Skip to content

Commit 72e91e9

Browse files
committed
hash: Add MurmurHash3 with streaming support
The implementation is based on the upstream PMurHash. The following variants are implemented - murmur3a, 32-bit hash - murmur3c, 128-bit hash for x86 - murmur3f, 128-bit hash for x64 The custom seed support is not targeted by this implementation. It will need a major change to the API, so then custom arguments can be passed through `hash_init`. For now, the starting hash is always zero. Fixes bug #68109, closes #6059 Signed-off-by: Anatol Belski <ab@php.net> Co-Developed-by: Michael Wallner <mike@php.net> Signed-off-by: Michael Wallner <mike@php.net>
1 parent 831abe2 commit 72e91e9

17 files changed

+1392
-7
lines changed

ext/hash/config.m4

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,12 @@ fi
3535

3636
EXT_HASH_SOURCES="hash.c hash_md.c hash_sha.c hash_ripemd.c hash_haval.c \
3737
hash_tiger.c hash_gost.c hash_snefru.c hash_whirlpool.c hash_adler32.c \
38-
hash_crc32.c hash_fnv.c hash_joaat.c $EXT_HASH_SHA3_SOURCES"
38+
hash_crc32.c hash_fnv.c hash_joaat.c $EXT_HASH_SHA3_SOURCES
39+
murmur/PMurHash.c murmur/PMurHash128.c hash_murmur.c"
3940
EXT_HASH_HEADERS="php_hash.h php_hash_md.h php_hash_sha.h php_hash_ripemd.h \
4041
php_hash_haval.h php_hash_tiger.h php_hash_gost.h php_hash_snefru.h \
4142
php_hash_whirlpool.h php_hash_adler32.h php_hash_crc32.h \
42-
php_hash_fnv.h php_hash_joaat.h php_hash_sha3.h"
43+
php_hash_fnv.h php_hash_joaat.h php_hash_sha3.h php_hash_murmur.h"
4344

4445
PHP_NEW_EXTENSION(hash, $EXT_HASH_SOURCES, 0,,$PHP_HASH_CFLAGS)
4546
PHP_INSTALL_HEADERS(ext/hash, $EXT_HASH_HEADERS)

ext/hash/config.w32

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ PHP_HASH = 'yes';
1111
EXTENSION('hash', 'hash.c hash_md.c hash_sha.c hash_ripemd.c hash_haval.c ' +
1212
'hash_tiger.c hash_gost.c hash_snefru.c hash_whirlpool.c ' +
1313
'hash_adler32.c hash_crc32.c hash_joaat.c hash_fnv.c ' +
14-
'hash_sha3.c', false);
14+
'hash_sha3.c hash_murmur.c', false);
1515

1616
var hash_sha3_dir = 'ext/hash/sha3/generic' + (X64 ? '64' : '32') + 'lc';
1717

@@ -28,7 +28,14 @@ if (!CHECK_HEADER_ADD_INCLUDE('KeccakHash.h', 'CFLAGS_HASH', hash_sha3_dir)) {
2828

2929
ADD_FLAG('CFLAGS_HASH', '/DKeccakP200_excluded /DKeccakP400_excluded /DKeccakP800_excluded /DZEND_ENABLE_STATIC_TSRMLS_CACHE=1');
3030

31+
var hash_murmur_dir = 'ext/hash/murmur';
32+
if (!CHECK_HEADER_ADD_INCLUDE('PMurHash.h', 'CFLAGS_HASH', hash_murmur_dir)) {
33+
ERROR('Unable to locate murmur headers');
34+
}
35+
ADD_SOURCES(hash_murmur_dir, 'PMurHash.c PMurHash128.c', 'hash');
36+
3137
PHP_INSTALL_HEADERS('ext/hash/', 'php_hash.h php_hash_md.h php_hash_sha.h ' +
3238
'php_hash_ripemd.h php_hash_haval.h php_hash_tiger.h ' +
3339
'php_hash_gost.h php_hash_snefru.h php_hash_whirlpool.h ' +
34-
'php_hash_adler32.h php_hash_crc32.h php_hash_sha3.h');
40+
'php_hash_adler32.h php_hash_crc32.h php_hash_sha3.h ' +
41+
'php_hash_murmur.h');

ext/hash/hash.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ struct mhash_bc_entry {
5252
int value;
5353
};
5454

55-
#define MHASH_NUM_ALGOS 35
55+
#define MHASH_NUM_ALGOS 38
5656

5757
static struct mhash_bc_entry mhash_to_hash[MHASH_NUM_ALGOS] = {
5858
{"CRC32", "crc32", 0}, /* used by bzip */
@@ -90,6 +90,9 @@ static struct mhash_bc_entry mhash_to_hash[MHASH_NUM_ALGOS] = {
9090
{"FNV1A64", "fnv1a64", 32},
9191
{"JOAAT", "joaat", 33},
9292
{"CRC32C", "crc32c", 34}, /* Castagnoli's CRC, used by iSCSI, SCTP, Btrfs, ext4, etc */
93+
{"MURMUR3A", "murmur3a", 35},
94+
{"MURMUR3C", "murmur3c", 36},
95+
{"MURMUR3F", "murmur3f", 37},
9396
};
9497
#endif
9598

@@ -1586,6 +1589,9 @@ PHP_MINIT_FUNCTION(hash)
15861589
php_hash_register_algo("fnv164", &php_hash_fnv164_ops);
15871590
php_hash_register_algo("fnv1a64", &php_hash_fnv1a64_ops);
15881591
php_hash_register_algo("joaat", &php_hash_joaat_ops);
1592+
php_hash_register_algo("murmur3a", &php_hash_murmur3a_ops);
1593+
php_hash_register_algo("murmur3c", &php_hash_murmur3c_ops);
1594+
php_hash_register_algo("murmur3f", &php_hash_murmur3f_ops);
15891595

15901596
PHP_HASH_HAVAL_REGISTER(3,128);
15911597
PHP_HASH_HAVAL_REGISTER(3,160);

ext/hash/hash_murmur.c

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
/*
2+
+----------------------------------------------------------------------+
3+
| Copyright (c) The PHP Group |
4+
+----------------------------------------------------------------------+
5+
| This source file is subject to version 3.01 of the PHP license, |
6+
| that is bundled with this package in the file LICENSE, and is |
7+
| available through the world-wide-web at the following url: |
8+
| http://www.php.net/license/3_01.txt |
9+
| If you did not receive a copy of the PHP license and are unable to |
10+
| obtain it through the world-wide-web, please send a note to |
11+
| license@php.net so we can mail you a copy immediately. |
12+
+----------------------------------------------------------------------+
13+
| Author: Anatol Belski <ab@php.net> |
14+
+----------------------------------------------------------------------+
15+
*/
16+
17+
#include "php_hash.h"
18+
#include "php_hash_murmur.h"
19+
20+
#include "murmur/PMurHash.h"
21+
#include "murmur/PMurHash128.h"
22+
23+
24+
const php_hash_ops php_hash_murmur3a_ops = {
25+
"murmur3a",
26+
(php_hash_init_func_t) PHP_MURMUR3AInit,
27+
(php_hash_update_func_t) PHP_MURMUR3AUpdate,
28+
(php_hash_final_func_t) PHP_MURMUR3AFinal,
29+
(php_hash_copy_func_t) PHP_MURMUR3ACopy,
30+
php_hash_serialize,
31+
php_hash_unserialize,
32+
PHP_MURMUR3A_SPEC,
33+
4,
34+
4,
35+
sizeof(PHP_MURMUR3A_CTX),
36+
0
37+
};
38+
39+
PHP_HASH_API void PHP_MURMUR3AInit(PHP_MURMUR3A_CTX *ctx)
40+
{
41+
ctx->h = 0;
42+
ctx->carry = 0;
43+
ctx->len = 0;
44+
}
45+
46+
PHP_HASH_API void PHP_MURMUR3AUpdate(PHP_MURMUR3A_CTX *ctx, const unsigned char *in, size_t len)
47+
{
48+
ctx->len += len;
49+
PMurHash32_Process(&ctx->h, &ctx->carry, in, len);
50+
}
51+
52+
PHP_HASH_API void PHP_MURMUR3AFinal(unsigned char digest[4], PHP_MURMUR3A_CTX *ctx)
53+
{
54+
ctx->h = PMurHash32_Result(ctx->h, ctx->carry, ctx->len);
55+
56+
digest[0] = (unsigned char)((ctx->h >> 24) & 0xff);
57+
digest[1] = (unsigned char)((ctx->h >> 16) & 0xff);
58+
digest[2] = (unsigned char)((ctx->h >> 8) & 0xff);
59+
digest[3] = (unsigned char)(ctx->h & 0xff);
60+
}
61+
62+
PHP_HASH_API int PHP_MURMUR3ACopy(const php_hash_ops *ops, PHP_MURMUR3A_CTX *orig_context, PHP_MURMUR3A_CTX *copy_context)
63+
{
64+
copy_context->h = orig_context->h;
65+
copy_context->carry = orig_context->carry;
66+
copy_context->len = orig_context->len;
67+
return SUCCESS;
68+
}
69+
70+
const php_hash_ops php_hash_murmur3c_ops = {
71+
"murmur3c",
72+
(php_hash_init_func_t) PHP_MURMUR3CInit,
73+
(php_hash_update_func_t) PHP_MURMUR3CUpdate,
74+
(php_hash_final_func_t) PHP_MURMUR3CFinal,
75+
(php_hash_copy_func_t) PHP_MURMUR3CCopy,
76+
php_hash_serialize,
77+
php_hash_unserialize,
78+
PHP_MURMUR3C_SPEC,
79+
16,
80+
4,
81+
sizeof(PHP_MURMUR3C_CTX),
82+
0
83+
};
84+
85+
PHP_HASH_API void PHP_MURMUR3CInit(PHP_MURMUR3C_CTX *ctx)
86+
{
87+
memset(&ctx->h, 0, sizeof ctx->h);
88+
memset(&ctx->carry, 0, sizeof ctx->carry);
89+
ctx->len = 0;
90+
}
91+
92+
PHP_HASH_API void PHP_MURMUR3CUpdate(PHP_MURMUR3C_CTX *ctx, const unsigned char *in, size_t len)
93+
{
94+
ctx->len += len;
95+
PMurHash128x86_Process(ctx->h, ctx->carry, in, len);
96+
}
97+
98+
PHP_HASH_API void PHP_MURMUR3CFinal(unsigned char digest[4], PHP_MURMUR3C_CTX *ctx)
99+
{
100+
uint32_t h[4] = {0};
101+
PMurHash128x86_Result(ctx->h, ctx->carry, ctx->len, h);
102+
103+
digest[0] = (unsigned char)((h[0] >> 24) & 0xff);
104+
digest[1] = (unsigned char)((h[0] >> 16) & 0xff);
105+
digest[2] = (unsigned char)((h[0] >> 8) & 0xff);
106+
digest[3] = (unsigned char)(h[0] & 0xff);
107+
digest[4] = (unsigned char)((h[1] >> 24) & 0xff);
108+
digest[5] = (unsigned char)((h[1] >> 16) & 0xff);
109+
digest[6] = (unsigned char)((h[1] >> 8) & 0xff);
110+
digest[7] = (unsigned char)(h[1] & 0xff);
111+
digest[8] = (unsigned char)((h[2] >> 24) & 0xff);
112+
digest[9] = (unsigned char)((h[2] >> 16) & 0xff);
113+
digest[10] = (unsigned char)((h[2] >> 8) & 0xff);
114+
digest[11] = (unsigned char)(h[2] & 0xff);
115+
digest[12] = (unsigned char)((h[3] >> 24) & 0xff);
116+
digest[13] = (unsigned char)((h[3] >> 16) & 0xff);
117+
digest[14] = (unsigned char)((h[3] >> 8) & 0xff);
118+
digest[15] = (unsigned char)(h[3] & 0xff);
119+
}
120+
121+
PHP_HASH_API int PHP_MURMUR3CCopy(const php_hash_ops *ops, PHP_MURMUR3C_CTX *orig_context, PHP_MURMUR3C_CTX *copy_context)
122+
{
123+
memcpy(&copy_context->h, &orig_context->h, sizeof orig_context->h);
124+
memcpy(&copy_context->carry, &orig_context->carry, sizeof orig_context->carry);
125+
copy_context->len = orig_context->len;
126+
return SUCCESS;
127+
}
128+
129+
const php_hash_ops php_hash_murmur3f_ops = {
130+
"murmur3f",
131+
(php_hash_init_func_t) PHP_MURMUR3FInit,
132+
(php_hash_update_func_t) PHP_MURMUR3FUpdate,
133+
(php_hash_final_func_t) PHP_MURMUR3FFinal,
134+
(php_hash_copy_func_t) PHP_MURMUR3FCopy,
135+
php_hash_serialize,
136+
php_hash_unserialize,
137+
PHP_MURMUR3F_SPEC,
138+
16,
139+
8,
140+
sizeof(PHP_MURMUR3F_CTX),
141+
0
142+
};
143+
144+
PHP_HASH_API void PHP_MURMUR3FInit(PHP_MURMUR3F_CTX *ctx)
145+
{
146+
memset(&ctx->h, 0, sizeof ctx->h);
147+
memset(&ctx->carry, 0, sizeof ctx->carry);
148+
ctx->len = 0;
149+
}
150+
151+
PHP_HASH_API void PHP_MURMUR3FUpdate(PHP_MURMUR3F_CTX *ctx, const unsigned char *in, size_t len)
152+
{
153+
ctx->len += len;
154+
PMurHash128x64_Process(ctx->h, ctx->carry, in, len);
155+
}
156+
157+
PHP_HASH_API void PHP_MURMUR3FFinal(unsigned char digest[4], PHP_MURMUR3F_CTX *ctx)
158+
{
159+
uint64_t h[2] = {0};
160+
PMurHash128x64_Result(ctx->h, ctx->carry, ctx->len, h);
161+
162+
digest[0] = (unsigned char)((h[0] >> 56) & 0xff);
163+
digest[1] = (unsigned char)((h[0] >> 48) & 0xff);
164+
digest[2] = (unsigned char)((h[0] >> 40) & 0xff);
165+
digest[3] = (unsigned char)((h[0] >> 32) & 0xff);
166+
digest[4] = (unsigned char)((h[0] >> 24) & 0xff);
167+
digest[5] = (unsigned char)((h[0] >> 16) & 0xff);
168+
digest[6] = (unsigned char)((h[0] >> 8) & 0xff);
169+
digest[7] = (unsigned char)(h[0] & 0xff);
170+
digest[8] = (unsigned char)((h[1] >> 56) & 0xff);
171+
digest[9] = (unsigned char)((h[1] >> 48) & 0xff);
172+
digest[10] = (unsigned char)((h[1] >> 40) & 0xff);
173+
digest[11] = (unsigned char)((h[1] >> 32) & 0xff);
174+
digest[12] = (unsigned char)((h[1] >> 24) & 0xff);
175+
digest[13] = (unsigned char)((h[1] >> 16) & 0xff);
176+
digest[14] = (unsigned char)((h[1] >> 8) & 0xff);
177+
digest[15] = (unsigned char)(h[1] & 0xff);
178+
}
179+
180+
PHP_HASH_API int PHP_MURMUR3FCopy(const php_hash_ops *ops, PHP_MURMUR3F_CTX *orig_context, PHP_MURMUR3F_CTX *copy_context)
181+
{
182+
memcpy(&copy_context->h, &orig_context->h, sizeof orig_context->h);
183+
memcpy(&copy_context->carry, &orig_context->carry, sizeof orig_context->carry);
184+
copy_context->len = orig_context->len;
185+
return SUCCESS;
186+
}

0 commit comments

Comments
 (0)