Skip to content

Commit 44c9095

Browse files
committed
hash: Add MurmurHash3 with streaming support
The implementation is based on the upstream PMurHash. The following variants are implemented - murmur3a, 32-bit hash - murmur3c, 128-bit hash for x86 - murmur3f, 128-bit hash for x64 The custom seed support is not targeted by this implementation. It will need a major change to the API, so then custom arguments can be passed through `hash_init`. For now, the starting hash is always zero. Signed-off-by: Anatol Belski <ab@php.net>
1 parent a9f6572 commit 44c9095

17 files changed

+1421
-6
lines changed

ext/hash/config.m4

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,15 @@ EXT_HASH_SOURCES="hash.c hash_md.c hash_sha.c hash_ripemd.c hash_haval.c \
3939
EXT_HASH_HEADERS="php_hash.h php_hash_md.h php_hash_sha.h php_hash_ripemd.h \
4040
php_hash_haval.h php_hash_tiger.h php_hash_gost.h php_hash_snefru.h \
4141
php_hash_whirlpool.h php_hash_adler32.h php_hash_crc32.h \
42-
php_hash_fnv.h php_hash_joaat.h php_hash_sha3.h"
42+
php_hash_fnv.h php_hash_joaat.h php_hash_sha3.h php_hash_murmur.h"
43+
44+
MURMUR_DIR=murmur
45+
PHP_HASH_CXX_FLAGS="-Iext/hash/$MURMUR_DIR"
4346

4447
PHP_NEW_EXTENSION(hash, $EXT_HASH_SOURCES, 0,,$PHP_HASH_CFLAGS)
4548
PHP_INSTALL_HEADERS(ext/hash, $EXT_HASH_HEADERS)
49+
50+
PHP_REQUIRE_CXX()
51+
PHP_ADD_SOURCES(PHP_EXT_DIR(hash), $MURMUR_DIR/PMurHash.cpp $MURMUR_DIR/PMurHash128.cpp hash_murmur.cpp, $PHP_HASH_CXX_FLAGS)
52+
53+
PHP_ADD_BUILD_DIR($ext_builddir/$MURMUR_DIR)

ext/hash/config.w32

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ PHP_HASH = 'yes';
1111
EXTENSION('hash', 'hash.c hash_md.c hash_sha.c hash_ripemd.c hash_haval.c ' +
1212
'hash_tiger.c hash_gost.c hash_snefru.c hash_whirlpool.c ' +
1313
'hash_adler32.c hash_crc32.c hash_joaat.c hash_fnv.c ' +
14-
'hash_sha3.c', false);
14+
'hash_sha3.c hash_murmur.cpp', false);
1515

1616
var hash_sha3_dir = 'ext/hash/sha3/generic' + (X64 ? '64' : '32') + 'lc';
1717

@@ -28,7 +28,14 @@ if (!CHECK_HEADER_ADD_INCLUDE('KeccakHash.h', 'CFLAGS_HASH', hash_sha3_dir)) {
2828

2929
ADD_FLAG('CFLAGS_HASH', '/DKeccakP200_excluded /DKeccakP400_excluded /DKeccakP800_excluded /DZEND_ENABLE_STATIC_TSRMLS_CACHE=1');
3030

31+
var hash_murmur_dir = 'ext/hash/murmur';
32+
if (!CHECK_HEADER_ADD_INCLUDE('PMurHash.h', 'CFLAGS_HASH', hash_murmur_dir)) {
33+
ERROR('Unable to locate murmur headers');
34+
}
35+
ADD_SOURCES(hash_murmur_dir, 'PMurHash.cpp PMurHash128.cpp', 'hash');
36+
3137
PHP_INSTALL_HEADERS('ext/hash/', 'php_hash.h php_hash_md.h php_hash_sha.h ' +
3238
'php_hash_ripemd.h php_hash_haval.h php_hash_tiger.h ' +
3339
'php_hash_gost.h php_hash_snefru.h php_hash_whirlpool.h ' +
34-
'php_hash_adler32.h php_hash_crc32.h php_hash_sha3.h');
40+
'php_hash_adler32.h php_hash_crc32.h php_hash_sha3.h ' +
41+
'php_hash_murmur.h');

ext/hash/hash.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ struct mhash_bc_entry {
5252
int value;
5353
};
5454

55-
#define MHASH_NUM_ALGOS 35
55+
#define MHASH_NUM_ALGOS 38
5656

5757
static struct mhash_bc_entry mhash_to_hash[MHASH_NUM_ALGOS] = {
5858
{"CRC32", "crc32", 0}, /* used by bzip */
@@ -90,6 +90,9 @@ static struct mhash_bc_entry mhash_to_hash[MHASH_NUM_ALGOS] = {
9090
{"FNV1A64", "fnv1a64", 32},
9191
{"JOAAT", "joaat", 33},
9292
{"CRC32C", "crc32c", 34}, /* Castagnoli's CRC, used by iSCSI, SCTP, Btrfs, ext4, etc */
93+
{"MURMUR3A", "murmur3a", 35},
94+
{"MURMUR3C", "murmur3c", 36},
95+
{"MURMUR3F", "murmur3f", 37},
9396
};
9497
#endif
9598

@@ -1586,6 +1589,9 @@ PHP_MINIT_FUNCTION(hash)
15861589
php_hash_register_algo("fnv164", &php_hash_fnv164_ops);
15871590
php_hash_register_algo("fnv1a64", &php_hash_fnv1a64_ops);
15881591
php_hash_register_algo("joaat", &php_hash_joaat_ops);
1592+
php_hash_register_algo("murmur3a", &php_hash_murmur3a_ops);
1593+
php_hash_register_algo("murmur3c", &php_hash_murmur3c_ops);
1594+
php_hash_register_algo("murmur3f", &php_hash_murmur3f_ops);
15891595

15901596
PHP_HASH_HAVAL_REGISTER(3,128);
15911597
PHP_HASH_HAVAL_REGISTER(3,160);

ext/hash/hash_murmur.cpp

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
/*
2+
+----------------------------------------------------------------------+
3+
| Copyright (c) The PHP Group |
4+
+----------------------------------------------------------------------+
5+
| This source file is subject to version 3.01 of the PHP license, |
6+
| that is bundled with this package in the file LICENSE, and is |
7+
| available through the world-wide-web at the following url: |
8+
| http://www.php.net/license/3_01.txt |
9+
| If you did not receive a copy of the PHP license and are unable to |
10+
| obtain it through the world-wide-web, please send a note to |
11+
| license@php.net so we can mail you a copy immediately. |
12+
+----------------------------------------------------------------------+
13+
| Author: Anatol Belski <ab@php.net> |
14+
+----------------------------------------------------------------------+
15+
*/
16+
17+
extern "C" {
18+
#include "php_hash.h"
19+
#include "php_hash_murmur.h"
20+
}
21+
22+
#include "PMurHash.h"
23+
#include "PMurHash128.h"
24+
#define FORCE_INLINE zend_always_inline
25+
#include "endianness.h"
26+
27+
28+
const php_hash_ops php_hash_murmur3a_ops = {/*{{{*/
29+
"murmur3a",
30+
(php_hash_init_func_t) PHP_MURMUR3AInit,
31+
(php_hash_update_func_t) PHP_MURMUR3AUpdate,
32+
(php_hash_final_func_t) PHP_MURMUR3AFinal,
33+
(php_hash_copy_func_t) PHP_MURMUR3ACopy,
34+
php_hash_serialize,
35+
php_hash_unserialize,
36+
PHP_MURMUR3A_SPEC,
37+
4,
38+
4,
39+
sizeof(PHP_MURMUR3A_CTX),
40+
0
41+
};/*}}}*/
42+
43+
PHP_HASH_API void PHP_MURMUR3AInit(PHP_MURMUR3A_CTX *ctx)
44+
{/*{{{*/
45+
ctx->h = 0;
46+
ctx->carry = 0;
47+
ctx->len = 0;
48+
}/*}}}*/
49+
50+
PHP_HASH_API void PHP_MURMUR3AUpdate(PHP_MURMUR3A_CTX *ctx, const unsigned char *in, size_t len)
51+
{/*{{{*/
52+
ctx->len += len;
53+
PMurHash32_Process(&ctx->h, &ctx->carry, in, len);
54+
}/*}}}*/
55+
56+
PHP_HASH_API void PHP_MURMUR3AFinal(unsigned char digest[4], PHP_MURMUR3A_CTX *ctx)
57+
{/*{{{*/
58+
ctx->h = PMurHash32_Result(ctx->h, ctx->carry, ctx->len);
59+
60+
digest[0] = (unsigned char)((ctx->h >> 24) & 0xff);
61+
digest[1] = (unsigned char)((ctx->h >> 16) & 0xff);
62+
digest[2] = (unsigned char)((ctx->h >> 8) & 0xff);
63+
digest[3] = (unsigned char)(ctx->h & 0xff);
64+
}/*}}}*/
65+
66+
PHP_HASH_API int PHP_MURMUR3ACopy(const php_hash_ops *ops, PHP_MURMUR3A_CTX *orig_context, PHP_MURMUR3A_CTX *copy_context)
67+
{/*{{{*/
68+
copy_context->h = orig_context->h;
69+
copy_context->carry = orig_context->carry;
70+
copy_context->len = orig_context->len;
71+
return SUCCESS;
72+
}/*}}}*/
73+
74+
const php_hash_ops php_hash_murmur3c_ops = {/*{{{*/
75+
"murmur3c",
76+
(php_hash_init_func_t) PHP_MURMUR3CInit,
77+
(php_hash_update_func_t) PHP_MURMUR3CUpdate,
78+
(php_hash_final_func_t) PHP_MURMUR3CFinal,
79+
(php_hash_copy_func_t) PHP_MURMUR3CCopy,
80+
php_hash_serialize,
81+
php_hash_unserialize,
82+
PHP_MURMUR3C_SPEC,
83+
16,
84+
4,
85+
sizeof(PHP_MURMUR3C_CTX),
86+
0
87+
};/*}}}*/
88+
89+
PHP_HASH_API void PHP_MURMUR3CInit(PHP_MURMUR3C_CTX *ctx)
90+
{/*{{{*/
91+
memset(&ctx->h, 0, sizeof ctx->h);
92+
memset(&ctx->carry, 0, sizeof ctx->carry);
93+
ctx->len = 0;
94+
}/*}}}*/
95+
96+
PHP_HASH_API void PHP_MURMUR3CUpdate(PHP_MURMUR3C_CTX *ctx, const unsigned char *in, size_t len)
97+
{/*{{{*/
98+
ctx->len += len;
99+
PMurHash128x86_Process(ctx->h, ctx->carry, in, len);
100+
}/*}}}*/
101+
102+
PHP_HASH_API void PHP_MURMUR3CFinal(unsigned char digest[4], PHP_MURMUR3C_CTX *ctx)
103+
{/*{{{*/
104+
uint32_t h[4] = {0};
105+
PMurHash128x86_Result(ctx->h, ctx->carry, ctx->len, h);
106+
107+
digest[0] = (unsigned char)((h[0] >> 24) & 0xff);
108+
digest[1] = (unsigned char)((h[0] >> 16) & 0xff);
109+
digest[2] = (unsigned char)((h[0] >> 8) & 0xff);
110+
digest[3] = (unsigned char)(h[0] & 0xff);
111+
digest[4] = (unsigned char)((h[1] >> 24) & 0xff);
112+
digest[5] = (unsigned char)((h[1] >> 16) & 0xff);
113+
digest[6] = (unsigned char)((h[1] >> 8) & 0xff);
114+
digest[7] = (unsigned char)(h[1] & 0xff);
115+
digest[8] = (unsigned char)((h[2] >> 24) & 0xff);
116+
digest[9] = (unsigned char)((h[2] >> 16) & 0xff);
117+
digest[10] = (unsigned char)((h[2] >> 8) & 0xff);
118+
digest[11] = (unsigned char)(h[2] & 0xff);
119+
digest[12] = (unsigned char)((h[3] >> 24) & 0xff);
120+
digest[13] = (unsigned char)((h[3] >> 16) & 0xff);
121+
digest[14] = (unsigned char)((h[3] >> 8) & 0xff);
122+
digest[15] = (unsigned char)(h[3] & 0xff);
123+
}/*}}}*/
124+
125+
PHP_HASH_API int PHP_MURMUR3CCopy(const php_hash_ops *ops, PHP_MURMUR3C_CTX *orig_context, PHP_MURMUR3C_CTX *copy_context)
126+
{/*{{{*/
127+
memcpy(&copy_context->h, &orig_context->h, sizeof orig_context->h);
128+
memcpy(&copy_context->carry, &orig_context->carry, sizeof orig_context->carry);
129+
copy_context->len = orig_context->len;
130+
return SUCCESS;
131+
}/*}}}*/
132+
133+
const php_hash_ops php_hash_murmur3f_ops = {/*{{{*/
134+
"murmur3f",
135+
(php_hash_init_func_t) PHP_MURMUR3FInit,
136+
(php_hash_update_func_t) PHP_MURMUR3FUpdate,
137+
(php_hash_final_func_t) PHP_MURMUR3FFinal,
138+
(php_hash_copy_func_t) PHP_MURMUR3FCopy,
139+
php_hash_serialize,
140+
php_hash_unserialize,
141+
PHP_MURMUR3F_SPEC,
142+
16,
143+
8,
144+
sizeof(PHP_MURMUR3F_CTX),
145+
0
146+
};/*}}}*/
147+
148+
PHP_HASH_API void PHP_MURMUR3FInit(PHP_MURMUR3F_CTX *ctx)
149+
{/*{{{*/
150+
memset(&ctx->h, 0, sizeof ctx->h);
151+
memset(&ctx->carry, 0, sizeof ctx->carry);
152+
ctx->len = 0;
153+
}/*}}}*/
154+
155+
PHP_HASH_API void PHP_MURMUR3FUpdate(PHP_MURMUR3F_CTX *ctx, const unsigned char *in, size_t len)
156+
{/*{{{*/
157+
ctx->len += len;
158+
PMurHash128x64_Process(ctx->h, ctx->carry, in, len);
159+
}/*}}}*/
160+
161+
PHP_HASH_API void PHP_MURMUR3FFinal(unsigned char digest[4], PHP_MURMUR3F_CTX *ctx)
162+
{/*{{{*/
163+
uint64_t h[2] = {0};
164+
PMurHash128x64_Result(ctx->h, ctx->carry, ctx->len, h);
165+
166+
digest[0] = (unsigned char)((h[0] >> 56) & 0xff);
167+
digest[1] = (unsigned char)((h[0] >> 48) & 0xff);
168+
digest[2] = (unsigned char)((h[0] >> 40) & 0xff);
169+
digest[3] = (unsigned char)((h[0] >> 32) & 0xff);
170+
digest[4] = (unsigned char)((h[0] >> 24) & 0xff);
171+
digest[5] = (unsigned char)((h[0] >> 16) & 0xff);
172+
digest[6] = (unsigned char)((h[0] >> 8) & 0xff);
173+
digest[7] = (unsigned char)(h[0] & 0xff);
174+
digest[8] = (unsigned char)((h[1] >> 56) & 0xff);
175+
digest[9] = (unsigned char)((h[1] >> 48) & 0xff);
176+
digest[10] = (unsigned char)((h[1] >> 40) & 0xff);
177+
digest[11] = (unsigned char)((h[1] >> 32) & 0xff);
178+
digest[12] = (unsigned char)((h[1] >> 24) & 0xff);
179+
digest[13] = (unsigned char)((h[1] >> 16) & 0xff);
180+
digest[14] = (unsigned char)((h[1] >> 8) & 0xff);
181+
digest[15] = (unsigned char)(h[1] & 0xff);
182+
}/*}}}*/
183+
184+
PHP_HASH_API int PHP_MURMUR3FCopy(const php_hash_ops *ops, PHP_MURMUR3F_CTX *orig_context, PHP_MURMUR3F_CTX *copy_context)
185+
{/*{{{*/
186+
memcpy(&copy_context->h, &orig_context->h, sizeof orig_context->h);
187+
memcpy(&copy_context->carry, &orig_context->carry, sizeof orig_context->carry);
188+
copy_context->len = orig_context->len;
189+
return SUCCESS;
190+
}/*}}}*/
191+

0 commit comments

Comments
 (0)