From 5b17668196c0b8c1d3657166ff3352a00951e812 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 27 Aug 2022 14:33:46 +0100 Subject: [PATCH 1/3] remove unused variables and imports --- .pre-commit-config.yaml | 4 ++ pandas/_libs/algos_common_helper.pxi.in | 28 +++++------ pandas/_libs/groupby.pyx | 5 -- pandas/_libs/hashing.pyx | 2 - pandas/_libs/hashtable.pyx | 11 +--- pandas/_libs/hashtable_class_helper.pxi.in | 58 +++++++++++----------- pandas/_libs/index.pyx | 8 --- pandas/_libs/internals.pyx | 2 +- pandas/_libs/interval.pyx | 12 +---- pandas/_libs/lib.pyx | 1 - pandas/_libs/sparse.pyx | 12 ++--- pandas/_libs/tslib.pyx | 3 +- pandas/_libs/tslibs/conversion.pyx | 17 +------ pandas/_libs/tslibs/dtypes.pyx | 2 - pandas/_libs/tslibs/fields.pyx | 3 -- pandas/_libs/tslibs/nattype.pyx | 4 -- pandas/_libs/tslibs/parsing.pyx | 2 +- pandas/_libs/tslibs/period.pyx | 4 +- pandas/_libs/tslibs/strptime.pyx | 4 +- pandas/_libs/tslibs/timedeltas.pyx | 3 +- pandas/_libs/tslibs/timestamps.pyx | 6 +-- pandas/_libs/tslibs/tzconversion.pyx | 1 - pandas/_libs/tslibs/vectorized.pyx | 1 - 23 files changed, 63 insertions(+), 130 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2ca5b5c9b896b..82af83ddf402b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -26,6 +26,10 @@ repos: hooks: - id: codespell types_or: [python, rst, markdown] +- repo: https://github.com/MarcoGorelli/cython-lint + rev: v0.1.2 + hooks: + - id: cython-lint - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.3.0 hooks: diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in index ce2e1ffbb5870..6fdd5b92caabc 100644 --- a/pandas/_libs/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -35,20 +35,20 @@ def ensure_object(object arr): {{py: # name, c_type, dtype -dtypes = [('float64', 'FLOAT64', 'float64'), - # ('float32', 'FLOAT32', 'float32'), # disabling bc unused - ('int8', 'INT8', 'int8'), - ('int16', 'INT16', 'int16'), - ('int32', 'INT32', 'int32'), - ('int64', 'INT64', 'int64'), - ('uint64', 'UINT64', 'uint64'), +dtypes = [('float64', 'NPY_FLOAT64', 'float64'), + # ('float32', 'NPY_FLOAT32', 'float32'), # disabling bc unused + ('int8', 'NPY_INT8', 'int8'), + ('int16', 'NPY_INT16', 'int16'), + ('int32', 'NPY_INT32', 'int32'), + ('int64', 'NPY_INT64', 'int64'), + ('uint64', 'NPY_UINT64', 'uint64'), # Disabling uint and complex dtypes because we do not use them # (and compiling them increases wheel size) (except uint64) - # ('uint8', 'UINT8', 'uint8'), - # ('uint16', 'UINT16', 'uint16'), - # ('uint32', 'UINT32', 'uint32'), - # ('complex64', 'COMPLEX64', 'complex64'), - # ('complex128', 'COMPLEX128', 'complex128') + # ('uint8', 'NPY_UINT8', 'uint8'), + # ('uint16', 'NPY_UINT16', 'uint16'), + # ('uint32', 'NPY_UINT32', 'uint32'), + # ('complex64', 'NPY_COMPLEX64', 'complex64'), + # ('complex128', 'NPY_COMPLEX128', 'complex128') ] def get_dispatch(dtypes): @@ -62,11 +62,11 @@ def get_dispatch(dtypes): def ensure_{{name}}(object arr, copy=True): if util.is_array(arr): - if (arr).descr.type_num == NPY_{{c_type}}: + if (arr).descr.type_num == {{c_type}}: return arr else: # equiv: arr.astype(np.{{dtype}}, copy=copy) - return cnp.PyArray_Cast(arr, cnp.NPY_{{c_type}}) + return cnp.PyArray_Cast(arr, cnp.{{c_type}}) else: return np.array(arr, dtype=np.{{dtype}}) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 563abf949dbbc..8f5dcb9e5377c 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -17,14 +17,10 @@ from numpy cimport ( float32_t, float64_t, int8_t, - int16_t, - int32_t, int64_t, intp_t, ndarray, uint8_t, - uint16_t, - uint32_t, uint64_t, ) from numpy.math cimport NAN @@ -38,7 +34,6 @@ from pandas._libs.algos cimport ( ) from pandas._libs.algos import ( - ensure_platform_int, groupsort_indexer, rank_1d, take_2d_axis1_float64_float64, diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx index 9ea0fa73cbc9f..64f753f13a624 100644 --- a/pandas/_libs/hashing.pyx +++ b/pandas/_libs/hashing.pyx @@ -13,7 +13,6 @@ from numpy cimport ( import_array, ndarray, uint8_t, - uint32_t, uint64_t, ) @@ -158,7 +157,6 @@ cdef uint64_t low_level_siphash(uint8_t* data, size_t datalen, cdef int i cdef uint8_t* end = data + datalen - (datalen % sizeof(uint64_t)) cdef int left = datalen & 7 - cdef int left_byte cdef int cROUNDS = 2 cdef int dROUNDS = 4 diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index bbc17c4cb5415..7aaeee043c72b 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -15,13 +15,7 @@ from libc.stdlib cimport ( import numpy as np cimport numpy as cnp -from numpy cimport ( - float64_t, - ndarray, - uint8_t, - uint32_t, -) -from numpy.math cimport NAN +from numpy cimport ndarray cnp.import_array() @@ -37,9 +31,6 @@ from pandas._libs.khash cimport ( kh_needed_n_buckets, kh_python_hash_equal, kh_python_hash_func, - kh_str_t, - khcomplex64_t, - khcomplex128_t, khiter_t, ) from pandas._libs.missing cimport checknull diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 54260a9a90964..9d2a8fd14d225 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -26,21 +26,21 @@ cdef kh{{name}}_t to_kh{{name}}_t({{name}}_t val) nogil: # name -c_types = ['khcomplex128_t', - 'khcomplex64_t', - 'float64_t', - 'float32_t', - 'int64_t', - 'int32_t', - 'int16_t', - 'int8_t', - 'uint64_t', - 'uint32_t', - 'uint16_t', - 'uint8_t'] +c_types = [('khcomplex128_t', 'are_equivalent_khcomplex128_t'), + ('khcomplex64_t', 'are_equivalent_khcomplex64_t'), + ('float64_t', 'are_equivalent_float64_t'), + ('float32_t', 'are_equivalent_float32_t'), + ('int64_t', 'are_equivalent_int64_t'), + ('int32_t', 'are_equivalent_int32_t'), + ('int16_t', 'are_equivalent_int16_t'), + ('int8_t', 'are_equivalent_int8_t'), + ('uint64_t', 'are_equivalent_uint64_t'), + ('uint32_t', 'are_equivalent_uint32_t'), + ('uint16_t', 'are_equivalent_uint16_t'), + ('uint8_t', 'are_equivalent_uint8_t')] }} -{{for c_type in c_types}} +{{for c_type, are_equivalent_c_type in c_types}} cdef bint is_nan_{{c_type}}({{c_type}} val) nogil: {{if c_type in {'khcomplex128_t', 'khcomplex64_t'} }} @@ -55,7 +55,7 @@ cdef bint is_nan_{{c_type}}({{c_type}} val) nogil: {{if c_type in {'khcomplex128_t', 'khcomplex64_t', 'float64_t', 'float32_t'} }} # are_equivalent_{{c_type}} is cimported via khash.pxd {{else}} -cdef bint are_equivalent_{{c_type}}({{c_type}} val1, {{c_type}} val2) nogil: +cdef bint {{are_equivalent_c_type}}({{c_type}} val1, {{c_type}} val2) nogil: return val1 == val2 {{endif}} @@ -375,24 +375,24 @@ cdef class HashTable: {{py: -# name, dtype, c_type, to_c_type -dtypes = [('Complex128', 'complex128', 'khcomplex128_t', 'to_khcomplex128_t'), - ('Float64', 'float64', 'float64_t', ''), - ('UInt64', 'uint64', 'uint64_t', ''), - ('Int64', 'int64', 'int64_t', ''), - ('Complex64', 'complex64', 'khcomplex64_t', 'to_khcomplex64_t'), - ('Float32', 'float32', 'float32_t', ''), - ('UInt32', 'uint32', 'uint32_t', ''), - ('Int32', 'int32', 'int32_t', ''), - ('UInt16', 'uint16', 'uint16_t', ''), - ('Int16', 'int16', 'int16_t', ''), - ('UInt8', 'uint8', 'uint8_t', ''), - ('Int8', 'int8', 'int8_t', '')] +# name, dtype, c_type, are_equivalent_c_type, to_c_type +dtypes = [('Complex128', 'complex128', 'khcomplex128_t', 'are_equivalent_khcomplex128_t', 'to_khcomplex128_t'), + ('Float64', 'float64', 'float64_t', 'are_equivalent_float64_t', ''), + ('UInt64', 'uint64', 'uint64_t', 'are_equivalent_uint64_t', ''), + ('Int64', 'int64', 'int64_t', 'are_equivalent_int64_t', ''), + ('Complex64', 'complex64', 'khcomplex64_t', 'are_equivalent_khcomplex64_t', 'to_khcomplex64_t'), + ('Float32', 'float32', 'float32_t', 'are_equivalent_float32_t', ''), + ('UInt32', 'uint32', 'uint32_t', 'are_equivalent_uint32_t', ''), + ('Int32', 'int32', 'int32_t', 'are_equivalent_int32_t', ''), + ('UInt16', 'uint16', 'uint16_t', 'are_equivalent_uint16_t', ''), + ('Int16', 'int16', 'int16_t', 'are_equivalent_int16_t', ''), + ('UInt8', 'uint8', 'uint8_t', 'are_equivalent_uint8_t', ''), + ('Int8', 'int8', 'int8_t', 'are_equivalent_int8_t', '')] }} -{{for name, dtype, c_type, to_c_type in dtypes}} +{{for name, dtype, c_type, are_equivalent_c_type, to_c_type in dtypes}} cdef class {{name}}HashTable(HashTable): @@ -613,7 +613,7 @@ cdef class {{name}}HashTable(HashTable): continue elif ignore_na and ( is_nan_{{c_type}}(val) or - (use_na_value and are_equivalent_{{c_type}}(val, na_value2)) + (use_na_value and {{are_equivalent_c_type}}(val, na_value2)) ): # if missing values do not count as unique values (i.e. if # ignore_na is True), skip the hashtable entry for them, diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 0cf7c4d45c634..ebb23b77c90b4 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -4,17 +4,10 @@ import numpy as np cimport numpy as cnp from numpy cimport ( - float32_t, - float64_t, - int8_t, - int16_t, - int32_t, int64_t, intp_t, ndarray, uint8_t, - uint16_t, - uint32_t, uint64_t, ) @@ -35,7 +28,6 @@ from pandas._libs import ( from pandas._libs.lib cimport eq_NA_compat from pandas._libs.missing cimport ( - C_NA as NA, checknull, is_matching_na, ) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 94ae4a021da4d..1a98633908a49 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -226,7 +226,7 @@ cdef class BlockPlacement: """ cdef: slice nv, s = self._ensure_has_slice() - Py_ssize_t other_int, start, stop, step + Py_ssize_t start, stop, step ndarray[intp_t, ndim=1] newarr if s is not None: diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 67c92a0f5df23..f4a25e9c80b8e 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -13,15 +13,7 @@ from cpython.datetime cimport ( import_datetime() cimport cython -from cpython.object cimport ( - Py_EQ, - Py_GE, - Py_GT, - Py_LE, - Py_LT, - Py_NE, - PyObject_RichCompare, -) +from cpython.object cimport PyObject_RichCompare from cython cimport Py_ssize_t import numpy as np @@ -31,9 +23,7 @@ from numpy cimport ( NPY_QUICKSORT, PyArray_ArgSort, PyArray_Take, - float32_t, float64_t, - int32_t, int64_t, ndarray, uint64_t, diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 65677bbdb0ea9..ec7c3d61566dc 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -47,7 +47,6 @@ from numpy cimport ( PyArray_IterNew, complex128_t, flatiter, - float32_t, float64_t, int64_t, intp_t, diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 6c10b394b91aa..031417fa50be0 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -3,10 +3,8 @@ import numpy as np cimport numpy as cnp from numpy cimport ( - float32_t, float64_t, int8_t, - int16_t, int32_t, int64_t, ndarray, @@ -127,7 +125,7 @@ cdef class IntIndex(SparseIndex): cpdef IntIndex intersect(self, SparseIndex y_): cdef: - Py_ssize_t out_length, xi, yi = 0, result_indexer = 0 + Py_ssize_t xi, yi = 0, result_indexer = 0 int32_t xind ndarray[int32_t, ndim=1] xindices, yindices, new_indices IntIndex y @@ -205,7 +203,7 @@ cdef class IntIndex(SparseIndex): Vectorized lookup, returns ndarray[int32_t] """ cdef: - Py_ssize_t n, i, ind_val + Py_ssize_t n ndarray[int32_t, ndim=1] inds ndarray[uint8_t, ndim=1, cast=True] mask ndarray[int32_t, ndim=1] masked @@ -232,7 +230,7 @@ cdef class IntIndex(SparseIndex): cpdef get_blocks(ndarray[int32_t, ndim=1] indices): cdef: - Py_ssize_t init_len, i, npoints, result_indexer = 0 + Py_ssize_t i, npoints, result_indexer = 0 int32_t block, length = 1, cur, prev ndarray[int32_t, ndim=1] locs, lens @@ -606,7 +604,7 @@ cdef class BlockUnion(BlockMerge): cdef: ndarray[int32_t, ndim=1] xstart, xend, ystart ndarray[int32_t, ndim=1] yend, out_bloc, out_blen - int32_t nstart, nend, diff + int32_t nstart, nend Py_ssize_t max_len, result_indexer = 0 xstart = self.xstart @@ -659,7 +657,7 @@ cdef class BlockUnion(BlockMerge): """ cdef: ndarray[int32_t, ndim=1] xstart, xend, ystart, yend - int32_t xi, yi, xnblocks, ynblocks, nend + int32_t xi, yi, ynblocks, nend if mode != 0 and mode != 1: raise Exception('Mode must be 0 or 1') diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 598e6b552e49b..ec48e54d6d5b5 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -474,7 +474,6 @@ cpdef array_to_datetime( Py_ssize_t i, n = len(values) object val, tz ndarray[int64_t] iresult - ndarray[object] oresult npy_datetimestruct dts NPY_DATETIMEUNIT out_bestunit bint utc_convert = bool(utc) @@ -489,7 +488,7 @@ cpdef array_to_datetime( _TSObject _ts int64_t value int out_local = 0, out_tzoffset = 0 - float offset_seconds, tz_offset + float tz_offset set out_tzoffset_vals = set() bint string_to_dts_failed datetime py_dt diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index b25095ead790b..b972ed2e3b31f 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -1,7 +1,4 @@ import inspect - -cimport cython - import warnings import numpy as np @@ -9,12 +6,9 @@ import numpy as np from pandas.util._exceptions import find_stack_level cimport numpy as cnp -from cpython.object cimport PyObject from numpy cimport ( int32_t, int64_t, - intp_t, - ndarray, ) cnp.import_array() @@ -42,33 +36,25 @@ from pandas._libs.tslibs.dtypes cimport ( from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_ns, - astype_overflowsafe, check_dts_bounds, dtstruct_to_dt64, get_datetime64_unit, get_datetime64_value, get_implementation_bounds, - get_unit_from_dtype, npy_datetime, npy_datetimestruct, npy_datetimestruct_to_datetime, pandas_datetime_to_datetimestruct, - pydatetime_to_dt64, pydatetime_to_dtstruct, string_to_dts, ) -from pandas._libs.tslibs.np_datetime import ( - OutOfBoundsDatetime, - OutOfBoundsTimedelta, -) +from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas._libs.tslibs.timezones cimport ( get_utcoffset, is_utc, maybe_get_tz, - tz_compare, - utc_pytz as UTC, ) from pandas._libs.tslibs.util cimport ( is_datetime64_object, @@ -82,7 +68,6 @@ from pandas._libs.tslibs.nattype cimport ( NPY_NAT, c_NaT as NaT, c_nat_strings as nat_strings, - checknull_with_nat, ) from pandas._libs.tslibs.tzconversion cimport ( Localizer, diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx index c09ac2a686d5c..2f847640d606e 100644 --- a/pandas/_libs/tslibs/dtypes.pyx +++ b/pandas/_libs/tslibs/dtypes.pyx @@ -1,7 +1,5 @@ # period frequency constants corresponding to scikits timeseries # originals -cimport cython - from enum import Enum from pandas._libs.tslibs.np_datetime cimport ( diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 71a0f2727445f..3c7406d231241 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -37,15 +37,12 @@ from pandas._libs.tslibs.ccalendar cimport ( get_iso_calendar, get_lastbday, get_week_of_year, - is_leapyear, iso_calendar_t, - month_offset, ) from pandas._libs.tslibs.nattype cimport NPY_NAT from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_ns, - get_unit_from_dtype, npy_datetimestruct, pandas_datetime_to_datetimestruct, pandas_timedelta_to_timedeltastruct, diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 909541d24121e..55c5e478868cb 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -15,10 +15,6 @@ from cpython.datetime cimport ( import_datetime() from cpython.object cimport ( Py_EQ, - Py_GE, - Py_GT, - Py_LE, - Py_LT, Py_NE, PyObject_RichCompare, ) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index b442e32071011..9d0479ec8dbf1 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -503,7 +503,7 @@ cdef inline object _parse_dateabbr_string(object date_string, datetime default, cdef: object ret # year initialized to prevent compiler warnings - int year = -1, quarter = -1, month, mnum + int year = -1, quarter = -1, month Py_ssize_t date_len # special handling for possibilities eg, 2Q2005, 2Q05, 2005Q1, 05Q1 diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index d2d4838bfafc0..bf2a27fe986ab 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -25,7 +25,6 @@ cimport cython from cpython.datetime cimport ( PyDate_Check, PyDateTime_Check, - PyDelta_Check, datetime, import_datetime, ) @@ -50,7 +49,6 @@ from pandas._libs.missing cimport C_NA from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_D, - NPY_FR_us, astype_overflowsafe, check_dts_bounds, get_timedelta64_value, @@ -780,7 +778,7 @@ cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) nogil: """ cdef: int64_t unix_date - int freq_group, fmonth, mdiff + int freq_group, fmonth NPY_DATETIMEUNIT unit freq_group = get_freq_group(freq) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 23816e200b788..2ae1ecc308902 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -365,10 +365,10 @@ FUNCTIONS: """ from _strptime import ( - LocaleTime, TimeRE as _TimeRE, _getlang, ) +from _strptime import LocaleTime # no-cython-lint class TimeRE(_TimeRE): @@ -507,7 +507,7 @@ cdef tzinfo parse_timezone_directive(str z): """ cdef: - int gmtoff_fraction, hours, minutes, seconds, pad_number, microseconds + int hours, minutes, seconds, pad_number, microseconds int total_minutes object gmtoff_remainder, gmtoff_remainder_padding diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 4fc0b49e6f5a3..c9e997ffb405c 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -51,7 +51,6 @@ from pandas._libs.tslibs.np_datetime cimport ( cmp_dtstructs, cmp_scalar, convert_reso, - get_conversion_factor, get_datetime64_unit, get_timedelta64_value, get_unit_from_dtype, @@ -1548,7 +1547,7 @@ cdef class _Timedelta(timedelta): @cython.cdivision(False) cdef _Timedelta _as_reso(self, NPY_DATETIMEUNIT reso, bint round_ok=True): cdef: - int64_t value, mult, div, mod + int64_t value if reso == self._reso: return self diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 2655c25ed0893..07c6e32028942 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -15,7 +15,6 @@ import numpy as np cimport numpy as cnp from numpy cimport ( - int8_t, int64_t, ndarray, uint8_t, @@ -65,7 +64,6 @@ from pandas._libs.tslibs.dtypes cimport ( from pandas._libs.tslibs.util cimport ( is_array, is_datetime64_object, - is_float_object, is_integer_object, is_timedelta64_object, ) @@ -87,7 +85,6 @@ from pandas._libs.tslibs.np_datetime cimport ( cmp_dtstructs, cmp_scalar, convert_reso, - get_conversion_factor, get_datetime64_unit, get_datetime64_value, get_unit_from_dtype, @@ -121,7 +118,6 @@ from pandas._libs.tslibs.timezones cimport ( is_utc, maybe_get_tz, treat_tz_as_pytz, - tz_compare, utc_pytz as UTC, ) from pandas._libs.tslibs.tzconversion cimport ( @@ -1109,7 +1105,7 @@ cdef class _Timestamp(ABCTimestamp): @cython.cdivision(False) cdef _Timestamp _as_reso(self, NPY_DATETIMEUNIT reso, bint round_ok=True): cdef: - int64_t value, mult, div, mod + int64_t value if reso == self._reso: return self diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 4487136aa7fb8..28ebce9724da9 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -43,7 +43,6 @@ from pandas._libs.tslibs.timezones cimport ( is_tzlocal, is_utc, is_zoneinfo, - utc_pytz, ) diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index b63b4cf1df66b..c1784c53a7857 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -11,7 +11,6 @@ import numpy as np cimport numpy as cnp from numpy cimport ( int64_t, - intp_t, ndarray, ) From d7a3e9f8112826565982ef0aa38e545b7c55dc64 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 29 Aug 2022 21:26:06 +0100 Subject: [PATCH 2/3] wip --- .pre-commit-config.yaml | 2 +- pandas/_libs/algos.pyx | 6 --- pandas/_libs/algos_common_helper.pxi.in | 28 +++++------ pandas/_libs/hashtable_class_helper.pxi.in | 58 +++++++++++----------- 4 files changed, 44 insertions(+), 50 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 82af83ddf402b..9fd71af3e8ceb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,7 +27,7 @@ repos: - id: codespell types_or: [python, rst, markdown] - repo: https://github.com/MarcoGorelli/cython-lint - rev: v0.1.2 + rev: v0.1.3 hooks: - id: cython-lint - repo: https://github.com/pre-commit/pre-commit-hooks diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index c05d6a300ccf0..96c47471aaf90 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -14,18 +14,12 @@ import numpy as np cimport numpy as cnp from numpy cimport ( - NPY_COMPLEX64, - NPY_COMPLEX128, - NPY_FLOAT32, NPY_FLOAT64, NPY_INT8, NPY_INT16, NPY_INT32, NPY_INT64, NPY_OBJECT, - NPY_UINT8, - NPY_UINT16, - NPY_UINT32, NPY_UINT64, float32_t, float64_t, diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in index 6fdd5b92caabc..ce2e1ffbb5870 100644 --- a/pandas/_libs/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -35,20 +35,20 @@ def ensure_object(object arr): {{py: # name, c_type, dtype -dtypes = [('float64', 'NPY_FLOAT64', 'float64'), - # ('float32', 'NPY_FLOAT32', 'float32'), # disabling bc unused - ('int8', 'NPY_INT8', 'int8'), - ('int16', 'NPY_INT16', 'int16'), - ('int32', 'NPY_INT32', 'int32'), - ('int64', 'NPY_INT64', 'int64'), - ('uint64', 'NPY_UINT64', 'uint64'), +dtypes = [('float64', 'FLOAT64', 'float64'), + # ('float32', 'FLOAT32', 'float32'), # disabling bc unused + ('int8', 'INT8', 'int8'), + ('int16', 'INT16', 'int16'), + ('int32', 'INT32', 'int32'), + ('int64', 'INT64', 'int64'), + ('uint64', 'UINT64', 'uint64'), # Disabling uint and complex dtypes because we do not use them # (and compiling them increases wheel size) (except uint64) - # ('uint8', 'NPY_UINT8', 'uint8'), - # ('uint16', 'NPY_UINT16', 'uint16'), - # ('uint32', 'NPY_UINT32', 'uint32'), - # ('complex64', 'NPY_COMPLEX64', 'complex64'), - # ('complex128', 'NPY_COMPLEX128', 'complex128') + # ('uint8', 'UINT8', 'uint8'), + # ('uint16', 'UINT16', 'uint16'), + # ('uint32', 'UINT32', 'uint32'), + # ('complex64', 'COMPLEX64', 'complex64'), + # ('complex128', 'COMPLEX128', 'complex128') ] def get_dispatch(dtypes): @@ -62,11 +62,11 @@ def get_dispatch(dtypes): def ensure_{{name}}(object arr, copy=True): if util.is_array(arr): - if (arr).descr.type_num == {{c_type}}: + if (arr).descr.type_num == NPY_{{c_type}}: return arr else: # equiv: arr.astype(np.{{dtype}}, copy=copy) - return cnp.PyArray_Cast(arr, cnp.{{c_type}}) + return cnp.PyArray_Cast(arr, cnp.NPY_{{c_type}}) else: return np.array(arr, dtype=np.{{dtype}}) diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 9d2a8fd14d225..54260a9a90964 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -26,21 +26,21 @@ cdef kh{{name}}_t to_kh{{name}}_t({{name}}_t val) nogil: # name -c_types = [('khcomplex128_t', 'are_equivalent_khcomplex128_t'), - ('khcomplex64_t', 'are_equivalent_khcomplex64_t'), - ('float64_t', 'are_equivalent_float64_t'), - ('float32_t', 'are_equivalent_float32_t'), - ('int64_t', 'are_equivalent_int64_t'), - ('int32_t', 'are_equivalent_int32_t'), - ('int16_t', 'are_equivalent_int16_t'), - ('int8_t', 'are_equivalent_int8_t'), - ('uint64_t', 'are_equivalent_uint64_t'), - ('uint32_t', 'are_equivalent_uint32_t'), - ('uint16_t', 'are_equivalent_uint16_t'), - ('uint8_t', 'are_equivalent_uint8_t')] +c_types = ['khcomplex128_t', + 'khcomplex64_t', + 'float64_t', + 'float32_t', + 'int64_t', + 'int32_t', + 'int16_t', + 'int8_t', + 'uint64_t', + 'uint32_t', + 'uint16_t', + 'uint8_t'] }} -{{for c_type, are_equivalent_c_type in c_types}} +{{for c_type in c_types}} cdef bint is_nan_{{c_type}}({{c_type}} val) nogil: {{if c_type in {'khcomplex128_t', 'khcomplex64_t'} }} @@ -55,7 +55,7 @@ cdef bint is_nan_{{c_type}}({{c_type}} val) nogil: {{if c_type in {'khcomplex128_t', 'khcomplex64_t', 'float64_t', 'float32_t'} }} # are_equivalent_{{c_type}} is cimported via khash.pxd {{else}} -cdef bint {{are_equivalent_c_type}}({{c_type}} val1, {{c_type}} val2) nogil: +cdef bint are_equivalent_{{c_type}}({{c_type}} val1, {{c_type}} val2) nogil: return val1 == val2 {{endif}} @@ -375,24 +375,24 @@ cdef class HashTable: {{py: -# name, dtype, c_type, are_equivalent_c_type, to_c_type -dtypes = [('Complex128', 'complex128', 'khcomplex128_t', 'are_equivalent_khcomplex128_t', 'to_khcomplex128_t'), - ('Float64', 'float64', 'float64_t', 'are_equivalent_float64_t', ''), - ('UInt64', 'uint64', 'uint64_t', 'are_equivalent_uint64_t', ''), - ('Int64', 'int64', 'int64_t', 'are_equivalent_int64_t', ''), - ('Complex64', 'complex64', 'khcomplex64_t', 'are_equivalent_khcomplex64_t', 'to_khcomplex64_t'), - ('Float32', 'float32', 'float32_t', 'are_equivalent_float32_t', ''), - ('UInt32', 'uint32', 'uint32_t', 'are_equivalent_uint32_t', ''), - ('Int32', 'int32', 'int32_t', 'are_equivalent_int32_t', ''), - ('UInt16', 'uint16', 'uint16_t', 'are_equivalent_uint16_t', ''), - ('Int16', 'int16', 'int16_t', 'are_equivalent_int16_t', ''), - ('UInt8', 'uint8', 'uint8_t', 'are_equivalent_uint8_t', ''), - ('Int8', 'int8', 'int8_t', 'are_equivalent_int8_t', '')] +# name, dtype, c_type, to_c_type +dtypes = [('Complex128', 'complex128', 'khcomplex128_t', 'to_khcomplex128_t'), + ('Float64', 'float64', 'float64_t', ''), + ('UInt64', 'uint64', 'uint64_t', ''), + ('Int64', 'int64', 'int64_t', ''), + ('Complex64', 'complex64', 'khcomplex64_t', 'to_khcomplex64_t'), + ('Float32', 'float32', 'float32_t', ''), + ('UInt32', 'uint32', 'uint32_t', ''), + ('Int32', 'int32', 'int32_t', ''), + ('UInt16', 'uint16', 'uint16_t', ''), + ('Int16', 'int16', 'int16_t', ''), + ('UInt8', 'uint8', 'uint8_t', ''), + ('Int8', 'int8', 'int8_t', '')] }} -{{for name, dtype, c_type, are_equivalent_c_type, to_c_type in dtypes}} +{{for name, dtype, c_type, to_c_type in dtypes}} cdef class {{name}}HashTable(HashTable): @@ -613,7 +613,7 @@ cdef class {{name}}HashTable(HashTable): continue elif ignore_na and ( is_nan_{{c_type}}(val) or - (use_na_value and {{are_equivalent_c_type}}(val, na_value2)) + (use_na_value and are_equivalent_{{c_type}}(val, na_value2)) ): # if missing values do not count as unique values (i.e. if # ignore_na is True), skip the hashtable entry for them, From 1c65df1dd2242955276cadde28a7c05dbca1983f Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 3 Sep 2022 12:36:41 +0100 Subject: [PATCH 3/3] bump to 0.1.4 --- .pre-commit-config.yaml | 2 +- pandas/_libs/interval.pyx | 1 - pandas/_libs/tslib.pyx | 2 -- pandas/_libs/tslibs/offsets.pyx | 1 - pandas/_libs/tslibs/strptime.pyx | 5 ----- 5 files changed, 1 insertion(+), 10 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9fd71af3e8ceb..0cd4e86cb24fa 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,7 +27,7 @@ repos: - id: codespell types_or: [python, rst, markdown] - repo: https://github.com/MarcoGorelli/cython-lint - rev: v0.1.3 + rev: v0.1.4 hooks: - id: cython-lint - repo: https://github.com/pre-commit/pre-commit-hooks diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index f4a25e9c80b8e..7ed635718e674 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -1,4 +1,3 @@ -import inspect import numbers from operator import ( le, diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index ec48e54d6d5b5..4d071793b3935 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -478,7 +478,6 @@ cpdef array_to_datetime( NPY_DATETIMEUNIT out_bestunit bint utc_convert = bool(utc) bint seen_integer = False - bint seen_string = False bint seen_datetime = False bint seen_datetime_offset = False bint is_raise = errors=='raise' @@ -571,7 +570,6 @@ cpdef array_to_datetime( elif isinstance(val, str): # string - seen_string = True if type(val) is not str: # GH#32264 np.str_ object val = str(val) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index d799770a57be2..565c887d1f40c 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1,5 +1,4 @@ import inspect -import operator import re import time import warnings diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 2ae1ecc308902..f7eb59828b993 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -1,10 +1,5 @@ """Strptime-related classes and functions. """ -import calendar -import locale -import re -import time - from cpython.datetime cimport ( date, tzinfo,