Skip to content

Commit 42371ed

Browse files
TimWollacmb69nielsdos
committed
hash: Add SSE2 implementation of SHA-256
Implementation taken from Tarsnap/libcperciva@661752a. Co-authored-by: Christoph M. Becker <cmbecker69@gmx.de> Co-authored-by: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
1 parent 0f2f7c6 commit 42371ed

File tree

6 files changed

+279
-3
lines changed

6 files changed

+279
-3
lines changed

README.REDIST.BINS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
18. avifinfo (ext/standard/libavifinfo) see ext/standard/libavifinfo/LICENSE
1919
19. xxHash (ext/hash/xxhash)
2020
20. Lexbor (ext/dom/lexbor/lexbor) see ext/dom/lexbor/LICENSE
21-
21+
21. Portions of libcperciva (ext/hash/hash_sha_sse2.c) see the header in the source file
2222

2323
3. pcre2lib (ext/pcre)
2424

ext/hash/config.m4

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ else
3333
PHP_HASH_CFLAGS="$PHP_HASH_CFLAGS -I@ext_srcdir@/$SHA3_DIR -DKeccakP200_excluded -DKeccakP400_excluded -DKeccakP800_excluded -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1"
3434
fi
3535

36-
EXT_HASH_SOURCES="hash.c hash_md.c hash_sha.c hash_ripemd.c hash_haval.c \
36+
EXT_HASH_SOURCES="hash.c hash_md.c hash_sha.c hash_sha_sse2.c hash_ripemd.c hash_haval.c \
3737
hash_tiger.c hash_gost.c hash_snefru.c hash_whirlpool.c hash_adler32.c \
3838
hash_crc32.c hash_fnv.c hash_joaat.c $EXT_HASH_SHA3_SOURCES
3939
murmur/PMurHash.c murmur/PMurHash128.c hash_murmur.c hash_xxhash.c"

ext/hash/config.w32

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ if (PHP_MHASH != 'no') {
99

1010
PHP_HASH = 'yes';
1111

12-
EXTENSION('hash', 'hash.c hash_md.c hash_sha.c hash_ripemd.c hash_haval.c ' +
12+
EXTENSION('hash', 'hash.c hash_md.c hash_sha.c hash_sha_sse2.c hash_ripemd.c hash_haval.c ' +
1313
'hash_tiger.c hash_gost.c hash_snefru.c hash_whirlpool.c ' +
1414
'hash_adler32.c hash_crc32.c hash_joaat.c hash_fnv.c ' +
1515
'hash_sha3.c hash_murmur.c hash_xxhash.c', false);

ext/hash/hash_sha.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,14 @@ PHP_HASH_API void PHP_SHA256InitArgs(PHP_SHA256_CTX * context, ZEND_ATTRIBUTE_UN
160160
*/
161161
static void SHA256Transform(uint32_t state[8], const unsigned char block[64])
162162
{
163+
#if defined(__SSE2__)
164+
uint32_t tmp32[72];
165+
166+
SHA256_Transform_sse2(state, block, &tmp32[0], &tmp32[64]);
167+
ZEND_SECURE_ZERO((unsigned char*) tmp32, sizeof(tmp32));
168+
return;
169+
#endif
170+
163171
uint32_t a = state[0], b = state[1], c = state[2], d = state[3];
164172
uint32_t e = state[4], f = state[5], g = state[6], h = state[7];
165173
uint32_t x[16], T1, T2, W[64];

ext/hash/hash_sha_sse2.c

Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
/*-
2+
* Copyright 2021 Tarsnap Backup Inc.
3+
* All rights reserved.
4+
*
5+
* Redistribution and use in source and binary forms, with or without
6+
* modification, are permitted provided that the following conditions
7+
* are met:
8+
* 1. Redistributions of source code must retain the above copyright
9+
* notice, this list of conditions and the following disclaimer.
10+
* 2. Redistributions in binary form must reproduce the above copyright
11+
* notice, this list of conditions and the following disclaimer in the
12+
* documentation and/or other materials provided with the distribution.
13+
*
14+
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15+
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17+
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18+
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19+
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20+
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21+
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22+
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23+
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24+
* SUCH DAMAGE.
25+
*/
26+
27+
#include "php_hash.h"
28+
#include "php_hash_sha.h"
29+
30+
#ifdef __SSE2__
31+
# include <emmintrin.h>
32+
33+
/* Original implementation from libcperciva follows.
34+
*
35+
* Modified to use `PHP_STATIC_RESTRICT` for MSVC compatibility.
36+
*/
37+
38+
/**
39+
* mm_bswap_epi32(a):
40+
* Byte-swap each 32-bit word.
41+
*/
42+
static inline __m128i
43+
mm_bswap_epi32(__m128i a)
44+
{
45+
46+
/* Swap bytes in each 16-bit word. */
47+
a = _mm_or_si128(_mm_slli_epi16(a, 8), _mm_srli_epi16(a, 8));
48+
49+
/* Swap all 16-bit words. */
50+
a = _mm_shufflelo_epi16(a, _MM_SHUFFLE(2, 3, 0, 1));
51+
a = _mm_shufflehi_epi16(a, _MM_SHUFFLE(2, 3, 0, 1));
52+
53+
return (a);
54+
}
55+
56+
/* SHA256 round constants. */
57+
static const uint32_t Krnd[64] = {
58+
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
59+
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
60+
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
61+
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
62+
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
63+
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
64+
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
65+
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
66+
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
67+
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
68+
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
69+
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
70+
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
71+
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
72+
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
73+
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
74+
};
75+
76+
/* Elementary functions used by SHA256 */
77+
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
78+
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
79+
#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
80+
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
81+
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
82+
83+
/* SHA256 round function */
84+
#define RND(a, b, c, d, e, f, g, h, k) \
85+
h += S1(e) + Ch(e, f, g) + k; \
86+
d += h; \
87+
h += S0(a) + Maj(a, b, c)
88+
89+
/* Adjusted round function for rotating state */
90+
#define RNDr(S, W, i, ii) \
91+
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
92+
S[(66 - i) % 8], S[(67 - i) % 8], \
93+
S[(68 - i) % 8], S[(69 - i) % 8], \
94+
S[(70 - i) % 8], S[(71 - i) % 8], \
95+
W[i + ii] + Krnd[i + ii])
96+
97+
/* Message schedule computation */
98+
#define SHR32(x, n) (_mm_srli_epi32(x, n))
99+
#define ROTR32(x, n) (_mm_or_si128(SHR32(x, n), _mm_slli_epi32(x, (32-n))))
100+
#define s0_128(x) _mm_xor_si128(_mm_xor_si128( \
101+
ROTR32(x, 7), ROTR32(x, 18)), SHR32(x, 3))
102+
103+
static inline __m128i
104+
s1_128_high(__m128i a)
105+
{
106+
__m128i b;
107+
__m128i c;
108+
109+
/* ROTR, loading data as {B, B, A, A}; lanes 1 & 3 will be junk. */
110+
b = _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 1, 0, 0));
111+
c = _mm_xor_si128(_mm_srli_epi64(b, 17), _mm_srli_epi64(b, 19));
112+
113+
/* Shift and XOR with rotated data; lanes 1 & 3 will be junk. */
114+
c = _mm_xor_si128(c, _mm_srli_epi32(b, 10));
115+
116+
/* Shuffle good data back and zero unwanted lanes. */
117+
c = _mm_shuffle_epi32(c, _MM_SHUFFLE(2, 0, 2, 0));
118+
c = _mm_slli_si128(c, 8);
119+
120+
return (c);
121+
}
122+
123+
static inline __m128i
124+
s1_128_low(__m128i a)
125+
{
126+
__m128i b;
127+
__m128i c;
128+
129+
/* ROTR, loading data as {B, B, A, A}; lanes 1 & 3 will be junk. */
130+
b = _mm_shuffle_epi32(a, _MM_SHUFFLE(3, 3, 2, 2));
131+
c = _mm_xor_si128(_mm_srli_epi64(b, 17), _mm_srli_epi64(b, 19));
132+
133+
/* Shift and XOR with rotated data; lanes 1 & 3 will be junk. */
134+
c = _mm_xor_si128(c, _mm_srli_epi32(b, 10));
135+
136+
/* Shuffle good data back and zero unwanted lanes. */
137+
c = _mm_shuffle_epi32(c, _MM_SHUFFLE(2, 0, 2, 0));
138+
c = _mm_srli_si128(c, 8);
139+
140+
return (c);
141+
}
142+
143+
/**
144+
* SPAN_ONE_THREE(a, b):
145+
* Combine the upper three words of ${a} with the lowest word of ${b}. This
146+
* could also be thought of returning bits [159:32] of the 256-bit value
147+
* consisting of (b[127:0] a[127:0]). In other words, set:
148+
* dst[31:0] := a[63:32]
149+
* dst[63:32] := a[95:64]
150+
* dst[95:64] := a[127:96]
151+
* dst[127:96] := b[31:0]
152+
*/
153+
#define SPAN_ONE_THREE(a, b) (_mm_shuffle_epi32(_mm_castps_si128( \
154+
_mm_move_ss(_mm_castsi128_ps(a), _mm_castsi128_ps(b))), \
155+
_MM_SHUFFLE(0, 3, 2, 1)))
156+
157+
/**
158+
* MSG4(X0, X1, X2, X3):
159+
* Calculate the next four values of the message schedule. If we define
160+
* ${W[j]} as the first unknown value in the message schedule, then the input
161+
* arguments are:
162+
* X0 = W[j - 16] : W[j - 13]
163+
* X1 = W[j - 12] : W[j - 9]
164+
* X2 = W[j - 8] : W[j - 5]
165+
* X3 = W[j - 4] : W[j - 1]
166+
* This function therefore calculates:
167+
* X4 = W[j + 0] : W[j + 3]
168+
*/
169+
static inline __m128i
170+
MSG4(__m128i X0, __m128i X1, __m128i X2, __m128i X3)
171+
{
172+
__m128i X4;
173+
__m128i Xj_minus_seven, Xj_minus_fifteen;
174+
175+
/* Set up variables which span X values. */
176+
Xj_minus_seven = SPAN_ONE_THREE(X2, X3);
177+
Xj_minus_fifteen = SPAN_ONE_THREE(X0, X1);
178+
179+
/* Begin computing X4. */
180+
X4 = _mm_add_epi32(X0, Xj_minus_seven);
181+
X4 = _mm_add_epi32(X4, s0_128(Xj_minus_fifteen));
182+
183+
/* First half of s1. */
184+
X4 = _mm_add_epi32(X4, s1_128_low(X3));
185+
186+
/* Second half of s1; this depends on the above value of X4. */
187+
X4 = _mm_add_epi32(X4, s1_128_high(X4));
188+
189+
return (X4);
190+
}
191+
192+
/**
193+
* SHA256_Transform_sse2(state, block, W, S):
194+
* Compute the SHA256 block compression function, transforming ${state} using
195+
* the data in ${block}. This implementation uses x86 SSE2 instructions, and
196+
* should only be used if _SSE2 is defined and cpusupport_x86_sse2() returns
197+
* nonzero. The arrays W and S may be filled with sensitive data, and should
198+
* be cleared by the callee.
199+
*/
200+
void
201+
SHA256_Transform_sse2(uint32_t state[PHP_STATIC_RESTRICT 8],
202+
const uint8_t block[PHP_STATIC_RESTRICT 64], uint32_t W[PHP_STATIC_RESTRICT 64],
203+
uint32_t S[PHP_STATIC_RESTRICT 8])
204+
{
205+
__m128i Y[4];
206+
int i;
207+
208+
/* 1. Prepare the first part of the message schedule W. */
209+
Y[0] = mm_bswap_epi32(_mm_loadu_si128((const __m128i *)&block[0]));
210+
_mm_storeu_si128((__m128i *)&W[0], Y[0]);
211+
Y[1] = mm_bswap_epi32(_mm_loadu_si128((const __m128i *)&block[16]));
212+
_mm_storeu_si128((__m128i *)&W[4], Y[1]);
213+
Y[2] = mm_bswap_epi32(_mm_loadu_si128((const __m128i *)&block[32]));
214+
_mm_storeu_si128((__m128i *)&W[8], Y[2]);
215+
Y[3] = mm_bswap_epi32(_mm_loadu_si128((const __m128i *)&block[48]));
216+
_mm_storeu_si128((__m128i *)&W[12], Y[3]);
217+
218+
/* 2. Initialize working variables. */
219+
memcpy(S, state, 32);
220+
221+
/* 3. Mix. */
222+
for (i = 0; i < 64; i += 16) {
223+
RNDr(S, W, 0, i);
224+
RNDr(S, W, 1, i);
225+
RNDr(S, W, 2, i);
226+
RNDr(S, W, 3, i);
227+
RNDr(S, W, 4, i);
228+
RNDr(S, W, 5, i);
229+
RNDr(S, W, 6, i);
230+
RNDr(S, W, 7, i);
231+
RNDr(S, W, 8, i);
232+
RNDr(S, W, 9, i);
233+
RNDr(S, W, 10, i);
234+
RNDr(S, W, 11, i);
235+
RNDr(S, W, 12, i);
236+
RNDr(S, W, 13, i);
237+
RNDr(S, W, 14, i);
238+
RNDr(S, W, 15, i);
239+
240+
if (i == 48)
241+
break;
242+
Y[0] = MSG4(Y[0], Y[1], Y[2], Y[3]);
243+
_mm_storeu_si128((__m128i *)&W[16 + i + 0], Y[0]);
244+
Y[1] = MSG4(Y[1], Y[2], Y[3], Y[0]);
245+
_mm_storeu_si128((__m128i *)&W[16 + i + 4], Y[1]);
246+
Y[2] = MSG4(Y[2], Y[3], Y[0], Y[1]);
247+
_mm_storeu_si128((__m128i *)&W[16 + i + 8], Y[2]);
248+
Y[3] = MSG4(Y[3], Y[0], Y[1], Y[2]);
249+
_mm_storeu_si128((__m128i *)&W[16 + i + 12], Y[3]);
250+
}
251+
252+
/* 4. Mix local working variables into global state. */
253+
for (i = 0; i < 8; i++)
254+
state[i] += S[i];
255+
}
256+
257+
#endif

ext/hash/php_hash_sha.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,17 @@ typedef struct {
4545
#define PHP_SHA256Init(ctx) PHP_SHA256InitArgs(ctx, NULL)
4646
PHP_HASH_API void PHP_SHA256InitArgs(PHP_SHA256_CTX *, ZEND_ATTRIBUTE_UNUSED HashTable *);
4747
PHP_HASH_API void PHP_SHA256Update(PHP_SHA256_CTX *, const unsigned char *, size_t);
48+
49+
#ifdef _MSC_VER
50+
# define PHP_STATIC_RESTRICT
51+
#else
52+
# define PHP_STATIC_RESTRICT static restrict
53+
#endif
54+
55+
#if defined(__SSE2__)
56+
void SHA256_Transform_sse2(uint32_t state[PHP_STATIC_RESTRICT 8], const uint8_t block[PHP_STATIC_RESTRICT 64], uint32_t W[PHP_STATIC_RESTRICT 64], uint32_t S[PHP_STATIC_RESTRICT 8]);
57+
#endif
58+
4859
PHP_HASH_API void PHP_SHA256Final(unsigned char[32], PHP_SHA256_CTX *);
4960

5061
/* SHA384 context */

0 commit comments

Comments
 (0)