From c5e30e68e881562a374e37765aa06609800a1e7e Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 30 Nov 2023 18:55:48 -0800 Subject: [PATCH 1/2] Remove inline_helper.h header file --- pandas/_libs/include/pandas/inline_helper.h | 24 ---------- .../_libs/include/pandas/parser/tokenizer.h | 1 - pandas/_libs/include/pandas/skiplist.h | 29 ++++++------ .../include/pandas/vendored/klib/khash.h | 15 +++--- .../pandas/vendored/klib/khash_python.h | 46 +++++++++---------- 5 files changed, 44 insertions(+), 71 deletions(-) delete mode 100644 pandas/_libs/include/pandas/inline_helper.h diff --git a/pandas/_libs/include/pandas/inline_helper.h b/pandas/_libs/include/pandas/inline_helper.h deleted file mode 100644 index 1e03da1327470..0000000000000 --- a/pandas/_libs/include/pandas/inline_helper.h +++ /dev/null @@ -1,24 +0,0 @@ -/* -Copyright (c) 2016, PyData Development Team -All rights reserved. - -Distributed under the terms of the BSD Simplified License. - -The full license is in the LICENSE file, distributed with this software. -*/ - -#pragma once - -#ifndef PANDAS_INLINE -#if defined(__clang__) -#define PANDAS_INLINE static __inline__ __attribute__((__unused__)) -#elif defined(__GNUC__) -#define PANDAS_INLINE static __inline__ -#elif defined(_MSC_VER) -#define PANDAS_INLINE static __inline -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L -#define PANDAS_INLINE static inline -#else -#define PANDAS_INLINE -#endif // __GNUC__ -#endif // PANDAS_INLINE diff --git a/pandas/_libs/include/pandas/parser/tokenizer.h b/pandas/_libs/include/pandas/parser/tokenizer.h index 6a46ad637a401..f7e36a2f05a70 100644 --- a/pandas/_libs/include/pandas/parser/tokenizer.h +++ b/pandas/_libs/include/pandas/parser/tokenizer.h @@ -18,7 +18,6 @@ See LICENSE for the license #define ERROR_OVERFLOW 2 #define ERROR_INVALID_CHARS 3 -#include "pandas/inline_helper.h" #include "pandas/portable.h" #include diff --git a/pandas/_libs/include/pandas/skiplist.h b/pandas/_libs/include/pandas/skiplist.h index d002dba193279..57e304b3a66c0 100644 --- a/pandas/_libs/include/pandas/skiplist.h +++ b/pandas/_libs/include/pandas/skiplist.h @@ -15,13 +15,12 @@ Python recipe (https://rhettinger.wordpress.com/2010/02/06/lost-knowledge/) #pragma once -#include "pandas/inline_helper.h" #include #include #include #include -PANDAS_INLINE float __skiplist_nanf(void) { +static inline float __skiplist_nanf(void) { const union { int __i; float __f; @@ -30,7 +29,7 @@ PANDAS_INLINE float __skiplist_nanf(void) { } #define PANDAS_NAN ((double)__skiplist_nanf()) -PANDAS_INLINE double Log2(double val) { return log(val) / log(2.); } +static inline double Log2(double val) { return log(val) / log(2.); } typedef struct node_t node_t; @@ -51,13 +50,13 @@ typedef struct { int maxlevels; } skiplist_t; -PANDAS_INLINE double urand(void) { +static inline double urand(void) { return ((double)rand() + 1) / ((double)RAND_MAX + 2); } -PANDAS_INLINE int int_min(int a, int b) { return a < b ? a : b; } +static inline int int_min(int a, int b) { return a < b ? a : b; } -PANDAS_INLINE node_t *node_init(double value, int levels) { +static inline node_t *node_init(double value, int levels) { node_t *result; result = (node_t *)malloc(sizeof(node_t)); if (result) { @@ -78,9 +77,9 @@ PANDAS_INLINE node_t *node_init(double value, int levels) { } // do this ourselves -PANDAS_INLINE void node_incref(node_t *node) { ++(node->ref_count); } +static inline void node_incref(node_t *node) { ++(node->ref_count); } -PANDAS_INLINE void node_decref(node_t *node) { --(node->ref_count); } +static inline void node_decref(node_t *node) { --(node->ref_count); } static void node_destroy(node_t *node) { int i; @@ -100,7 +99,7 @@ static void node_destroy(node_t *node) { } } -PANDAS_INLINE void skiplist_destroy(skiplist_t *skp) { +static inline void skiplist_destroy(skiplist_t *skp) { if (skp) { node_destroy(skp->head); free(skp->tmp_steps); @@ -109,7 +108,7 @@ PANDAS_INLINE void skiplist_destroy(skiplist_t *skp) { } } -PANDAS_INLINE skiplist_t *skiplist_init(int expected_size) { +static inline skiplist_t *skiplist_init(int expected_size) { skiplist_t *result; node_t *NIL, *head; int maxlevels, i; @@ -147,7 +146,7 @@ PANDAS_INLINE skiplist_t *skiplist_init(int expected_size) { } // 1 if left < right, 0 if left == right, -1 if left > right -PANDAS_INLINE int _node_cmp(node_t *node, double value) { +static inline int _node_cmp(node_t *node, double value) { if (node->is_nil || node->value > value) { return -1; } else if (node->value < value) { @@ -157,7 +156,7 @@ PANDAS_INLINE int _node_cmp(node_t *node, double value) { } } -PANDAS_INLINE double skiplist_get(skiplist_t *skp, int i, int *ret) { +static inline double skiplist_get(skiplist_t *skp, int i, int *ret) { node_t *node; int level; @@ -181,7 +180,7 @@ PANDAS_INLINE double skiplist_get(skiplist_t *skp, int i, int *ret) { // Returns the lowest rank of all elements with value `value`, as opposed to the // highest rank returned by `skiplist_insert`. -PANDAS_INLINE int skiplist_min_rank(skiplist_t *skp, double value) { +static inline int skiplist_min_rank(skiplist_t *skp, double value) { node_t *node; int level, rank = 0; @@ -199,7 +198,7 @@ PANDAS_INLINE int skiplist_min_rank(skiplist_t *skp, double value) { // Returns the rank of the inserted element. When there are duplicates, // `rank` is the highest of the group, i.e. the 'max' method of // https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rank.html -PANDAS_INLINE int skiplist_insert(skiplist_t *skp, double value) { +static inline int skiplist_insert(skiplist_t *skp, double value) { node_t *node, *prevnode, *newnode, *next_at_level; int *steps_at_level; int size, steps, level, rank = 0; @@ -253,7 +252,7 @@ PANDAS_INLINE int skiplist_insert(skiplist_t *skp, double value) { return rank + 1; } -PANDAS_INLINE int skiplist_remove(skiplist_t *skp, double value) { +static inline int skiplist_remove(skiplist_t *skp, double value) { int level, size; node_t *node, *prevnode, *tmpnode, *next_at_level; node_t **chain; diff --git a/pandas/_libs/include/pandas/vendored/klib/khash.h b/pandas/_libs/include/pandas/vendored/klib/khash.h index 31d12a3b30001..f072106e09596 100644 --- a/pandas/_libs/include/pandas/vendored/klib/khash.h +++ b/pandas/_libs/include/pandas/vendored/klib/khash.h @@ -85,7 +85,6 @@ int main() { #define AC_VERSION_KHASH_H "0.2.6" -#include "pandas/inline_helper.h" #include #include #include @@ -153,7 +152,7 @@ typedef khuint_t khiter_t; // specializations of // https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp -khuint32_t PANDAS_INLINE murmur2_32to32(khuint32_t k) { +static inline khuint32_t murmur2_32to32(khuint32_t k) { const khuint32_t SEED = 0xc70f6907UL; // 'm' and 'r' are mixing constants generated offline. // They're not really 'magic', they just happen to work well. @@ -186,7 +185,7 @@ khuint32_t PANDAS_INLINE murmur2_32to32(khuint32_t k) { // - no performance difference could be measured compared to a possible // x64-version -khuint32_t PANDAS_INLINE murmur2_32_32to32(khuint32_t k1, khuint32_t k2) { +static inline khuint32_t murmur2_32_32to32(khuint32_t k1, khuint32_t k2) { const khuint32_t SEED = 0xc70f6907UL; // 'm' and 'r' are mixing constants generated offline. // They're not really 'magic', they just happen to work well. @@ -220,7 +219,7 @@ khuint32_t PANDAS_INLINE murmur2_32_32to32(khuint32_t k1, khuint32_t k2) { return h; } -khuint32_t PANDAS_INLINE murmur2_64to32(khuint64_t k) { +static inline khuint32_t murmur2_64to32(khuint64_t k) { khuint32_t k1 = (khuint32_t)k; khuint32_t k2 = (khuint32_t)(k >> 32); @@ -445,7 +444,7 @@ static const double __ac_HASH_UPPER = 0.77; #define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, \ __hash_equal) \ - KHASH_INIT2(name, PANDAS_INLINE, khkey_t, khval_t, kh_is_map, __hash_func, \ + KHASH_INIT2(name, static inline, khkey_t, khval_t, kh_is_map, __hash_func, \ __hash_equal) /* --- BEGIN OF HASH FUNCTIONS --- */ @@ -465,7 +464,7 @@ static const double __ac_HASH_UPPER = 0.77; @param key The integer [khuint64_t] @return The hash value [khuint_t] */ -PANDAS_INLINE khuint_t kh_int64_hash_func(khuint64_t key) { +static inline khuint_t kh_int64_hash_func(khuint64_t key) { return (khuint_t)((key) >> 33 ^ (key) ^ (key) << 11); } /*! @function @@ -478,7 +477,7 @@ PANDAS_INLINE khuint_t kh_int64_hash_func(khuint64_t key) { @param s Pointer to a null terminated string @return The hash value */ -PANDAS_INLINE khuint_t __ac_X31_hash_string(const char *s) { +static inline khuint_t __ac_X31_hash_string(const char *s) { khuint_t h = *s; if (h) for (++s; *s; ++s) @@ -496,7 +495,7 @@ PANDAS_INLINE khuint_t __ac_X31_hash_string(const char *s) { */ #define kh_str_hash_equal(a, b) (strcmp(a, b) == 0) -PANDAS_INLINE khuint_t __ac_Wang_hash(khuint_t key) { +static inline khuint_t __ac_Wang_hash(khuint_t key) { key += ~(key << 15); key ^= (key >> 10); key += (key << 3); diff --git a/pandas/_libs/include/pandas/vendored/klib/khash_python.h b/pandas/_libs/include/pandas/vendored/klib/khash_python.h index dc16a4ada1716..5a933b45d9e21 100644 --- a/pandas/_libs/include/pandas/vendored/klib/khash_python.h +++ b/pandas/_libs/include/pandas/vendored/klib/khash_python.h @@ -83,13 +83,13 @@ void traced_free(void *ptr) { // implementation of dicts, which shines for smaller sizes but is more // predisposed to superlinear running times (see GH 36729 for comparison) -khuint64_t PANDAS_INLINE asuint64(double key) { +static inline khuint64_t asuint64(double key) { khuint64_t val; memcpy(&val, &key, sizeof(double)); return val; } -khuint32_t PANDAS_INLINE asuint32(float key) { +static inline khuint32_t asuint32(float key) { khuint32_t val; memcpy(&val, &key, sizeof(float)); return val; @@ -98,7 +98,7 @@ khuint32_t PANDAS_INLINE asuint32(float key) { #define ZERO_HASH 0 #define NAN_HASH 0 -khuint32_t PANDAS_INLINE kh_float64_hash_func(double val) { +static inline khuint32_t kh_float64_hash_func(double val) { // 0.0 and -0.0 should have the same hash: if (val == 0.0) { return ZERO_HASH; @@ -111,7 +111,7 @@ khuint32_t PANDAS_INLINE kh_float64_hash_func(double val) { return murmur2_64to32(as_int); } -khuint32_t PANDAS_INLINE kh_float32_hash_func(float val) { +static inline khuint32_t kh_float32_hash_func(float val) { // 0.0 and -0.0 should have the same hash: if (val == 0.0f) { return ZERO_HASH; @@ -138,10 +138,10 @@ KHASH_MAP_INIT_FLOAT64(float64, size_t) KHASH_MAP_INIT_FLOAT32(float32, size_t) -khint32_t PANDAS_INLINE kh_complex128_hash_func(khcomplex128_t val) { +static inline khint32_t kh_complex128_hash_func(khcomplex128_t val) { return kh_float64_hash_func(val.real) ^ kh_float64_hash_func(val.imag); } -khint32_t PANDAS_INLINE kh_complex64_hash_func(khcomplex64_t val) { +static inline khint32_t kh_complex64_hash_func(khcomplex64_t val) { return kh_float32_hash_func(val.real) ^ kh_float32_hash_func(val.imag); } @@ -164,7 +164,7 @@ KHASH_MAP_INIT_COMPLEX128(complex128, size_t) #define kh_exist_complex128(h, k) (kh_exist(h, k)) // NaN-floats should be in the same equivalency class, see GH 22119 -int PANDAS_INLINE floatobject_cmp(PyFloatObject *a, PyFloatObject *b) { +static inline int floatobject_cmp(PyFloatObject *a, PyFloatObject *b) { return (Py_IS_NAN(PyFloat_AS_DOUBLE(a)) && Py_IS_NAN(PyFloat_AS_DOUBLE(b))) || (PyFloat_AS_DOUBLE(a) == PyFloat_AS_DOUBLE(b)); } @@ -172,7 +172,7 @@ int PANDAS_INLINE floatobject_cmp(PyFloatObject *a, PyFloatObject *b) { // NaNs should be in the same equivalency class, see GH 41836 // PyObject_RichCompareBool for complexobjects has a different behavior // needs to be replaced -int PANDAS_INLINE complexobject_cmp(PyComplexObject *a, PyComplexObject *b) { +static inline int complexobject_cmp(PyComplexObject *a, PyComplexObject *b) { return (Py_IS_NAN(a->cval.real) && Py_IS_NAN(b->cval.real) && Py_IS_NAN(a->cval.imag) && Py_IS_NAN(b->cval.imag)) || (Py_IS_NAN(a->cval.real) && Py_IS_NAN(b->cval.real) && @@ -182,12 +182,12 @@ int PANDAS_INLINE complexobject_cmp(PyComplexObject *a, PyComplexObject *b) { (a->cval.real == b->cval.real && a->cval.imag == b->cval.imag); } -int PANDAS_INLINE pyobject_cmp(PyObject *a, PyObject *b); +static inline int pyobject_cmp(PyObject *a, PyObject *b); // replacing PyObject_RichCompareBool (NaN!=NaN) with pyobject_cmp (NaN==NaN), // which treats NaNs as equivalent // see GH 41836 -int PANDAS_INLINE tupleobject_cmp(PyTupleObject *a, PyTupleObject *b) { +static inline int tupleobject_cmp(PyTupleObject *a, PyTupleObject *b) { Py_ssize_t i; if (Py_SIZE(a) != Py_SIZE(b)) { @@ -202,7 +202,7 @@ int PANDAS_INLINE tupleobject_cmp(PyTupleObject *a, PyTupleObject *b) { return 1; } -int PANDAS_INLINE pyobject_cmp(PyObject *a, PyObject *b) { +static inline int pyobject_cmp(PyObject *a, PyObject *b) { if (a == b) { return 1; } @@ -230,7 +230,7 @@ int PANDAS_INLINE pyobject_cmp(PyObject *a, PyObject *b) { return result; } -Py_hash_t PANDAS_INLINE _Pandas_HashDouble(double val) { +static inline Py_hash_t _Pandas_HashDouble(double val) { // Since Python3.10, nan is no longer has hash 0 if (Py_IS_NAN(val)) { return 0; @@ -242,14 +242,14 @@ Py_hash_t PANDAS_INLINE _Pandas_HashDouble(double val) { #endif } -Py_hash_t PANDAS_INLINE floatobject_hash(PyFloatObject *key) { +static inline Py_hash_t floatobject_hash(PyFloatObject *key) { return _Pandas_HashDouble(PyFloat_AS_DOUBLE(key)); } #define _PandasHASH_IMAG 1000003UL // replaces _Py_HashDouble with _Pandas_HashDouble -Py_hash_t PANDAS_INLINE complexobject_hash(PyComplexObject *key) { +static inline Py_hash_t complexobject_hash(PyComplexObject *key) { Py_uhash_t realhash = (Py_uhash_t)_Pandas_HashDouble(key->cval.real); Py_uhash_t imaghash = (Py_uhash_t)_Pandas_HashDouble(key->cval.imag); if (realhash == (Py_uhash_t)-1 || imaghash == (Py_uhash_t)-1) { @@ -262,7 +262,7 @@ Py_hash_t PANDAS_INLINE complexobject_hash(PyComplexObject *key) { return (Py_hash_t)combined; } -khuint32_t PANDAS_INLINE kh_python_hash_func(PyObject *key); +static inline khuint32_t kh_python_hash_func(PyObject *key); // we could use any hashing algorithm, this is the original CPython's for tuples @@ -280,7 +280,7 @@ khuint32_t PANDAS_INLINE kh_python_hash_func(PyObject *key); ((x << 13) | (x >> 19)) /* Rotate left 13 bits */ #endif -Py_hash_t PANDAS_INLINE tupleobject_hash(PyTupleObject *key) { +static inline Py_hash_t tupleobject_hash(PyTupleObject *key) { Py_ssize_t i, len = Py_SIZE(key); PyObject **item = key->ob_item; @@ -304,7 +304,7 @@ Py_hash_t PANDAS_INLINE tupleobject_hash(PyTupleObject *key) { return acc; } -khuint32_t PANDAS_INLINE kh_python_hash_func(PyObject *key) { +static inline khuint32_t kh_python_hash_func(PyObject *key) { Py_hash_t hash; // For PyObject_Hash holds: // hash(0.0) == 0 == hash(-0.0) @@ -373,14 +373,14 @@ typedef struct { typedef kh_str_starts_t *p_kh_str_starts_t; -p_kh_str_starts_t PANDAS_INLINE kh_init_str_starts(void) { +static inline p_kh_str_starts_t kh_init_str_starts(void) { kh_str_starts_t *result = (kh_str_starts_t *)KHASH_CALLOC(1, sizeof(kh_str_starts_t)); result->table = kh_init_str(); return result; } -khuint_t PANDAS_INLINE kh_put_str_starts_item(kh_str_starts_t *table, char *key, +static inline khuint_t kh_put_str_starts_item(kh_str_starts_t *table, char *key, int *ret) { khuint_t result = kh_put_str(table->table, key, ret); if (*ret != 0) { @@ -389,7 +389,7 @@ khuint_t PANDAS_INLINE kh_put_str_starts_item(kh_str_starts_t *table, char *key, return result; } -khuint_t PANDAS_INLINE kh_get_str_starts_item(const kh_str_starts_t *table, +static inline khuint_t kh_get_str_starts_item(const kh_str_starts_t *table, const char *key) { unsigned char ch = *key; if (table->starts[ch]) { @@ -399,18 +399,18 @@ khuint_t PANDAS_INLINE kh_get_str_starts_item(const kh_str_starts_t *table, return 0; } -void PANDAS_INLINE kh_destroy_str_starts(kh_str_starts_t *table) { +static inline void kh_destroy_str_starts(kh_str_starts_t *table) { kh_destroy_str(table->table); KHASH_FREE(table); } -void PANDAS_INLINE kh_resize_str_starts(kh_str_starts_t *table, khuint_t val) { +static inline void kh_resize_str_starts(kh_str_starts_t *table, khuint_t val) { kh_resize_str(table->table, val); } // utility function: given the number of elements // returns number of necessary buckets -khuint_t PANDAS_INLINE kh_needed_n_buckets(khuint_t n_elements) { +static inline khuint_t kh_needed_n_buckets(khuint_t n_elements) { khuint_t candidate = n_elements; kroundup32(candidate); khuint_t upper_bound = (khuint_t)(candidate * __ac_HASH_UPPER + 0.5); From 345335100869acca94f98dc703280a47b330be2b Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 30 Nov 2023 19:00:37 -0800 Subject: [PATCH 2/2] missing fix --- pandas/_libs/src/parser/tokenizer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c index c9466c485ae94..f3d976841808c 100644 --- a/pandas/_libs/src/parser/tokenizer.c +++ b/pandas/_libs/src/parser/tokenizer.c @@ -343,7 +343,7 @@ static int push_char(parser_t *self, char c) { return 0; } -int PANDAS_INLINE end_field(parser_t *self) { +static inline int end_field(parser_t *self) { // XXX cruft if (self->words_len >= self->words_cap) { TRACE(("end_field: ERROR!!! self->words_len(%zu) >= "