From 474d327c1e0a9ad1dc8c8b689a131180b31d4248 Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 22 May 2021 12:20:04 +0100 Subject: [PATCH 1/7] CLN: various related to numeric indexes --- pandas/_testing/__init__.py | 31 +++++++++++++++++++++++++++---- pandas/_testing/asserters.py | 2 +- pandas/core/indexes/numeric.py | 7 +++++-- pandas/tests/api/test_api.py | 2 +- pandas/tests/base/test_unique.py | 8 ++++---- 5 files changed, 38 insertions(+), 12 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index aaf58f1fcb150..0a904839afbc1 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -33,6 +33,7 @@ is_period_dtype, is_sequence, is_timedelta64_dtype, + pandas_dtype, ) import pandas as pd @@ -41,11 +42,14 @@ CategoricalIndex, DataFrame, DatetimeIndex, + Float64Index, Index, + Int64Index, IntervalIndex, MultiIndex, RangeIndex, Series, + UInt64Index, bdate_range, ) from pandas._testing._io import ( # noqa:F401 @@ -292,12 +296,31 @@ def makeBoolIndex(k=10, name=None): return Index([False, True] + [False] * (k - 2), name=name) +def makeNumericIndex(k=10, name=None, *, dtype): + dtype = pandas_dtype(dtype) + + if dtype.kind == "i": + values = list(range(k)) + elif dtype.kind == "u": + start_num = 2 ** (dtype.itemsize * 8 - 1) + values = [start_num + i for i in range(k)] + elif dtype.kind == "f": + values = sorted(np.random.random_sample(k)) - np.random.random_sample(1) + values = values * (10 ** np.random.randint(0, 9)) + else: + raise NotImplementedError(f"wrong dtype {dtype}") + + return Index(values, dtype=dtype, name=name) + + def makeIntIndex(k=10, name=None): - return Index(list(range(k)), name=name) + base_idx = makeNumericIndex(k, dtype="int64") + return Int64Index(base_idx) def makeUIntIndex(k=10, name=None): - return Index([2 ** 63 + i for i in range(k)], name=name) + base_idx = makeNumericIndex(k, name=name, dtype="uint64") + return UInt64Index(base_idx) def makeRangeIndex(k=10, name=None, **kwargs): @@ -305,8 +328,8 @@ def makeRangeIndex(k=10, name=None, **kwargs): def makeFloatIndex(k=10, name=None): - values = sorted(np.random.random_sample(k)) - np.random.random_sample(1) - return Index(values * (10 ** np.random.randint(0, 9)), name=name) + base_idx = makeNumericIndex(k, name=name, dtype="float64") + return Float64Index(base_idx) def makeDateIndex(k: int = 10, freq="B", name=None, **kwargs) -> DatetimeIndex: diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 2d695458e32e6..96d010b487a79 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -308,7 +308,7 @@ def assert_index_equal( """ __tracebackhide__ = True - def _check_types(left, right, obj="Index"): + def _check_types(left, right, obj="Index") -> None: if not exact: return diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index e6526bd0eaf2f..cb52549521042 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -106,7 +106,7 @@ def _can_hold_na(self) -> bool: else: return False - @cache_readonly + @property def _engine_type(self): return { np.int8: libindex.Int8Engine, @@ -228,7 +228,10 @@ def astype(self, dtype, copy=True): @doc(Index._should_fallback_to_positional) def _should_fallback_to_positional(self) -> bool: - return False + if is_float_dtype(self.dtype): + return False + else: + return super()._should_fallback_to_positional() @doc(Index._convert_slice_indexer) def _convert_slice_indexer(self, key: slice, kind: str): diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index c36552f59da71..8b7070e945439 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -214,7 +214,7 @@ def test_api(self): + self.funcs_to + self.private_modules ) - self.check(pd, checkthese, self.ignored) + self.check(namespace=pd, expected=checkthese, ignored=self.ignored) def test_depr(self): deprecated_list = ( diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py index 26e785a2796b1..cabe766a4e9eb 100644 --- a/pandas/tests/base/test_unique.py +++ b/pandas/tests/base/test_unique.py @@ -23,12 +23,12 @@ def test_unique(index_or_series_obj): if isinstance(obj, pd.MultiIndex): expected = pd.MultiIndex.from_tuples(unique_values) expected.names = obj.names - tm.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected, exact=True) elif isinstance(obj, pd.Index): expected = pd.Index(unique_values, dtype=obj.dtype) if is_datetime64tz_dtype(obj.dtype): expected = expected.normalize() - tm.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected, exact=True) else: expected = np.array(unique_values) tm.assert_numpy_array_equal(result, expected) @@ -67,7 +67,7 @@ def test_unique_null(null_obj, index_or_series_obj): if is_datetime64tz_dtype(obj.dtype): result = result.normalize() expected = expected.normalize() - tm.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected, exact=True) else: expected = np.array(unique_values, dtype=obj.dtype) tm.assert_numpy_array_equal(result, expected) @@ -118,7 +118,7 @@ def test_unique_bad_unicode(idx_or_series_w_bad_unicode): if isinstance(obj, pd.Index): expected = pd.Index(["\ud83d"], dtype=object) - tm.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected, exact=True) else: expected = np.array(["\ud83d"], dtype=object) tm.assert_numpy_array_equal(result, expected) From ec0e5e2d869c9026c3f6df294fb5c881d3cde31c Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 22 May 2021 12:26:47 +0100 Subject: [PATCH 2/7] fix _should_fallback_to_positional --- pandas/core/indexes/numeric.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index cb52549521042..5e1badf2c490b 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -228,10 +228,7 @@ def astype(self, dtype, copy=True): @doc(Index._should_fallback_to_positional) def _should_fallback_to_positional(self) -> bool: - if is_float_dtype(self.dtype): - return False - else: - return super()._should_fallback_to_positional() + return False @doc(Index._convert_slice_indexer) def _convert_slice_indexer(self, key: slice, kind: str): From 34519f11db638267848bc56c76085da1335fbc61 Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 23 May 2021 06:51:42 +0100 Subject: [PATCH 3/7] assert dtype --- pandas/_testing/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 0a904839afbc1..ba75f9e1c95fb 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -298,6 +298,7 @@ def makeBoolIndex(k=10, name=None): def makeNumericIndex(k=10, name=None, *, dtype): dtype = pandas_dtype(dtype) + assert isinstance(dtype, np.dtype) if dtype.kind == "i": values = list(range(k)) @@ -314,7 +315,7 @@ def makeNumericIndex(k=10, name=None, *, dtype): def makeIntIndex(k=10, name=None): - base_idx = makeNumericIndex(k, dtype="int64") + base_idx = makeNumericIndex(k, name=name, dtype="int64") return Int64Index(base_idx) From ce35d545e2c0c821b3d4aa9a5c6c84564192823e Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 23 May 2021 11:13:00 +0100 Subject: [PATCH 4/7] NumericIndex._engine_type --- pandas/core/indexes/numeric.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 5e1badf2c490b..f3ddd84268bdb 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -106,20 +106,22 @@ def _can_hold_na(self) -> bool: else: return False + _engine_types: dict[np.dtype : libindex.IndexEngine] = { + np.dtype(np.int8): libindex.Int8Engine, + np.dtype(np.int16): libindex.Int16Engine, + np.dtype(np.int32): libindex.Int32Engine, + np.dtype(np.int64): libindex.Int64Engine, + np.dtype(np.uint8): libindex.UInt8Engine, + np.dtype(np.uint16): libindex.UInt16Engine, + np.dtype(np.uint32): libindex.UInt32Engine, + np.dtype(np.uint64): libindex.UInt64Engine, + np.dtype(np.float32): libindex.Float32Engine, + np.dtype(np.float64): libindex.Float64Engine, + } + @property def _engine_type(self): - return { - np.int8: libindex.Int8Engine, - np.int16: libindex.Int16Engine, - np.int32: libindex.Int32Engine, - np.int64: libindex.Int64Engine, - np.uint8: libindex.UInt8Engine, - np.uint16: libindex.UInt16Engine, - np.uint32: libindex.UInt32Engine, - np.uint64: libindex.UInt64Engine, - np.float32: libindex.Float32Engine, - np.float64: libindex.Float64Engine, - }[self.dtype.type] + return self._engine_types[self.dtype] @cache_readonly def inferred_type(self) -> str: From 8583961d40ce292f195fc80f3e9ac1ee06566511 Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 23 May 2021 14:32:41 +0100 Subject: [PATCH 5/7] typing bug --- pandas/core/indexes/numeric.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index f3ddd84268bdb..bce1a7cf158b7 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -106,7 +106,7 @@ def _can_hold_na(self) -> bool: else: return False - _engine_types: dict[np.dtype : libindex.IndexEngine] = { + _engine_types: dict[np.dtype, libindex.IndexEngine] = { np.dtype(np.int8): libindex.Int8Engine, np.dtype(np.int16): libindex.Int16Engine, np.dtype(np.int32): libindex.Int32Engine, @@ -120,7 +120,7 @@ def _can_hold_na(self) -> bool: } @property - def _engine_type(self): + def _engine_type(self) -> libindex.IndexEngine: return self._engine_types[self.dtype] @cache_readonly From ce82b422fcfacd960d060612ad61c42426607b12 Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 23 May 2021 16:28:22 +0100 Subject: [PATCH 6/7] typing issues --- pandas/core/indexes/numeric.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index bce1a7cf158b7..ea2d5d9eec6ac 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -106,7 +106,7 @@ def _can_hold_na(self) -> bool: else: return False - _engine_types: dict[np.dtype, libindex.IndexEngine] = { + _engine_types: dict[np.dtype, type[libindex.IndexEngine]] = { np.dtype(np.int8): libindex.Int8Engine, np.dtype(np.int16): libindex.Int16Engine, np.dtype(np.int32): libindex.Int32Engine, @@ -120,7 +120,7 @@ def _can_hold_na(self) -> bool: } @property - def _engine_type(self) -> libindex.IndexEngine: + def _engine_type(self): return self._engine_types[self.dtype] @cache_readonly From 02f54b2439bb94c533565d9ddd310c8e5a38877e Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 26 May 2021 17:56:00 +0100 Subject: [PATCH 7/7] change according to comments --- pandas/_testing/__init__.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index ba75f9e1c95fb..40f23c25a1e99 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -30,9 +30,12 @@ from pandas.core.dtypes.common import ( is_datetime64_dtype, is_datetime64tz_dtype, + is_float_dtype, + is_integer_dtype, is_period_dtype, is_sequence, is_timedelta64_dtype, + is_unsigned_integer_dtype, pandas_dtype, ) @@ -300,13 +303,13 @@ def makeNumericIndex(k=10, name=None, *, dtype): dtype = pandas_dtype(dtype) assert isinstance(dtype, np.dtype) - if dtype.kind == "i": - values = list(range(k)) - elif dtype.kind == "u": - start_num = 2 ** (dtype.itemsize * 8 - 1) - values = [start_num + i for i in range(k)] - elif dtype.kind == "f": - values = sorted(np.random.random_sample(k)) - np.random.random_sample(1) + if is_integer_dtype(dtype): + values = np.arange(k, dtype=dtype) + if is_unsigned_integer_dtype(dtype): + values += 2 ** (dtype.itemsize * 8 - 1) + elif is_float_dtype(dtype): + values = np.random.random_sample(k) - np.random.random_sample(1) + values.sort() values = values * (10 ** np.random.randint(0, 9)) else: raise NotImplementedError(f"wrong dtype {dtype}")