@@ -242,7 +242,7 @@ int PANDAS_INLINE pyobject_cmp(PyObject* a, PyObject* b) {
242
242
}
243
243
244
244
245
- Py_hash_t PANDAS_INLINE _Pandas_HashDouble (double val ){
245
+ Py_hash_t PANDAS_INLINE _Pandas_HashDouble (double val ) {
246
246
//Since Python3.10, nan is no longer has hash 0
247
247
if (Py_IS_NAN (val )) {
248
248
return 0 ;
@@ -255,13 +255,13 @@ Py_hash_t PANDAS_INLINE _Pandas_HashDouble(double val){
255
255
}
256
256
257
257
258
- Py_hash_t PANDAS_INLINE floatobject_hash (PyFloatObject * key ){
258
+ Py_hash_t PANDAS_INLINE floatobject_hash (PyFloatObject * key ) {
259
259
return _Pandas_HashDouble (PyFloat_AS_DOUBLE (key ));
260
260
}
261
261
262
262
263
263
// replaces _Py_HashDouble with _Pandas_HashDouble
264
- Py_hash_t PANDAS_INLINE complexobject_hash (PyComplexObject * key ){
264
+ Py_hash_t PANDAS_INLINE complexobject_hash (PyComplexObject * key ) {
265
265
Py_uhash_t realhash = (Py_uhash_t )_Pandas_HashDouble (key -> cval .real );
266
266
Py_uhash_t imaghash = (Py_uhash_t )_Pandas_HashDouble (key -> cval .imag );
267
267
if (realhash == (Py_uhash_t )- 1 || imaghash == (Py_uhash_t )- 1 ) {
@@ -275,11 +275,52 @@ Py_hash_t PANDAS_INLINE complexobject_hash(PyComplexObject* key){
275
275
}
276
276
277
277
278
- khint32_t PANDAS_INLINE kh_python_hash_func (PyObject * key ){
278
+ khint32_t PANDAS_INLINE kh_python_hash_func (PyObject * key );
279
+
280
+ //we could use any hashing algorithm, this is the original CPython's for tuples
281
+
282
+ #if SIZEOF_PY_UHASH_T > 4
283
+ #define _PandasHASH_XXPRIME_1 ((Py_uhash_t)11400714785074694791ULL)
284
+ #define _PandasHASH_XXPRIME_2 ((Py_uhash_t)14029467366897019727ULL)
285
+ #define _PandasHASH_XXPRIME_5 ((Py_uhash_t)2870177450012600261ULL)
286
+ #define _PandasHASH_XXROTATE (x ) ((x << 31) | (x >> 33)) /* Rotate left 31 bits */
287
+ #else
288
+ #define _PandasHASH_XXPRIME_1 ((Py_uhash_t)2654435761UL)
289
+ #define _PandasHASH_XXPRIME_2 ((Py_uhash_t)2246822519UL)
290
+ #define _PandasHASH_XXPRIME_5 ((Py_uhash_t)374761393UL)
291
+ #define _PandasHASH_XXROTATE (x ) ((x << 13) | (x >> 19)) /* Rotate left 13 bits */
292
+ #endif
293
+
294
+ Py_hash_t PANDAS_INLINE tupleobject_hash (PyTupleObject * key ) {
295
+ Py_ssize_t i , len = Py_SIZE (key );
296
+ PyObject * * item = key -> ob_item ;
297
+
298
+ Py_uhash_t acc = _PandasHASH_XXPRIME_5 ;
299
+ for (i = 0 ; i < len ; i ++ ) {
300
+ Py_uhash_t lane = kh_python_hash_func (item [i ]);
301
+ if (lane == (Py_uhash_t )- 1 ) {
302
+ return -1 ;
303
+ }
304
+ acc += lane * _PandasHASH_XXPRIME_2 ;
305
+ acc = _PandasHASH_XXROTATE (acc );
306
+ acc *= _PandasHASH_XXPRIME_1 ;
307
+ }
308
+
309
+ /* Add input length, mangled to keep the historical value of hash(()). */
310
+ acc += len ^ (_PandasHASH_XXPRIME_5 ^ 3527539UL );
311
+
312
+ if (acc == (Py_uhash_t )- 1 ) {
313
+ return 1546275796 ;
314
+ }
315
+ return acc ;
316
+ }
317
+
318
+
319
+ khint32_t PANDAS_INLINE kh_python_hash_func (PyObject * key ) {
279
320
Py_hash_t hash ;
280
321
// For PyObject_Hash holds:
281
322
// hash(0.0) == 0 == hash(-0.0)
282
- // yet for different nan-object different hash-values
323
+ // yet for different nan-objects different hash-values
283
324
// are possible
284
325
if (PyFloat_CheckExact (key )) {
285
326
// we cannot use kh_float64_hash_func
@@ -293,6 +334,9 @@ khint32_t PANDAS_INLINE kh_python_hash_func(PyObject* key){
293
334
// and kh_complex128_hash_func doesn't respect it
294
335
hash = complexobject_hash ((PyComplexObject * )key );
295
336
}
337
+ else if (PyTuple_CheckExact (key )) {
338
+ hash = tupleobject_hash ((PyTupleObject * )key );
339
+ }
296
340
else {
297
341
hash = PyObject_Hash (key );
298
342
}
0 commit comments