Skip to content

CLN: various related to numeric indexes #41615

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jun 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 31 additions & 4 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,13 @@
from pandas.core.dtypes.common import (
is_datetime64_dtype,
is_datetime64tz_dtype,
is_float_dtype,
is_integer_dtype,
is_period_dtype,
is_sequence,
is_timedelta64_dtype,
is_unsigned_integer_dtype,
pandas_dtype,
)

import pandas as pd
Expand All @@ -41,11 +45,14 @@
CategoricalIndex,
DataFrame,
DatetimeIndex,
Float64Index,
Index,
Int64Index,
IntervalIndex,
MultiIndex,
RangeIndex,
Series,
UInt64Index,
bdate_range,
)
from pandas._testing._io import ( # noqa:F401
Expand Down Expand Up @@ -292,21 +299,41 @@ def makeBoolIndex(k=10, name=None):
return Index([False, True] + [False] * (k - 2), name=name)


def makeNumericIndex(k=10, name=None, *, dtype):
dtype = pandas_dtype(dtype)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we expect non-numpy dtypes here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’ve added an assert for that here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

my idea was to just use dtype = np.dtype(dtype)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

>>> np.dtype("Int8")  # capital Int
DeprecationWarning: Numeric-style type codes are deprecated and will result in an error in the future.
  np.dtype("Int8")
dtype('int8')

So I think we should hold on this until the above raises IMO.

Copy link
Contributor

@jreback jreback May 27, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this will not ever raise as these are pandas dtypes

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jbrockmendel raises a valid point; if u r expecting only numpy dtypes then why are pandas dtypes passed?

Copy link
Contributor Author

@topper-123 topper-123 May 27, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this will not ever raise as these are pandas dtypes

assert isinstance(dtype, np.dtype) on line 304 ensures that it will raise if it is a pandas dtype.

if u r expecting only numpy dtypes then why are pandas dtypes passed?

I'm not intending to pass pandas dtypes here, but I'm using pandas_dtype to avoid an ambiguity: The string "Int64" in Pandas means Int64Dtype, but in numpy it means np.int64. So by using dtype = pandas_dtype(dtype), then an assert(dtype, np.dtype), we ensure the correct dtype..

assert isinstance(dtype, np.dtype)

if is_integer_dtype(dtype):
values = np.arange(k, dtype=dtype)
if is_unsigned_integer_dtype(dtype):
values += 2 ** (dtype.itemsize * 8 - 1)
elif is_float_dtype(dtype):
values = np.random.random_sample(k) - np.random.random_sample(1)
values.sort()
values = values * (10 ** np.random.randint(0, 9))
else:
raise NotImplementedError(f"wrong dtype {dtype}")

return Index(values, dtype=dtype, name=name)
Copy link
Contributor Author

@topper-123 topper-123 May 22, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In #41153 we'll only need to change this to return NumericIndex(values, dtype=dtype, name=name) after this.



def makeIntIndex(k=10, name=None):
return Index(list(range(k)), name=name)
base_idx = makeNumericIndex(k, name=name, dtype="int64")
return Int64Index(base_idx)


def makeUIntIndex(k=10, name=None):
return Index([2 ** 63 + i for i in range(k)], name=name)
base_idx = makeNumericIndex(k, name=name, dtype="uint64")
return UInt64Index(base_idx)


def makeRangeIndex(k=10, name=None, **kwargs):
return RangeIndex(0, k, 1, name=name, **kwargs)


def makeFloatIndex(k=10, name=None):
values = sorted(np.random.random_sample(k)) - np.random.random_sample(1)
return Index(values * (10 ** np.random.randint(0, 9)), name=name)
base_idx = makeNumericIndex(k, name=name, dtype="float64")
return Float64Index(base_idx)


def makeDateIndex(k: int = 10, freq="B", name=None, **kwargs) -> DatetimeIndex:
Expand Down
2 changes: 1 addition & 1 deletion pandas/_testing/asserters.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ def assert_index_equal(
"""
__tracebackhide__ = True

def _check_types(left, right, obj="Index"):
def _check_types(left, right, obj="Index") -> None:
if not exact:
return

Expand Down
28 changes: 15 additions & 13 deletions pandas/core/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,20 +106,22 @@ def _can_hold_na(self) -> bool:
else:
return False

@cache_readonly
_engine_types: dict[np.dtype, type[libindex.IndexEngine]] = {
np.dtype(np.int8): libindex.Int8Engine,
np.dtype(np.int16): libindex.Int16Engine,
np.dtype(np.int32): libindex.Int32Engine,
np.dtype(np.int64): libindex.Int64Engine,
np.dtype(np.uint8): libindex.UInt8Engine,
np.dtype(np.uint16): libindex.UInt16Engine,
np.dtype(np.uint32): libindex.UInt32Engine,
np.dtype(np.uint64): libindex.UInt64Engine,
np.dtype(np.float32): libindex.Float32Engine,
np.dtype(np.float64): libindex.Float64Engine,
}

@property
def _engine_type(self):
return {
np.int8: libindex.Int8Engine,
np.int16: libindex.Int16Engine,
np.int32: libindex.Int32Engine,
np.int64: libindex.Int64Engine,
np.uint8: libindex.UInt8Engine,
np.uint16: libindex.UInt16Engine,
np.uint32: libindex.UInt32Engine,
np.uint64: libindex.UInt64Engine,
np.float32: libindex.Float32Engine,
np.float64: libindex.Float64Engine,
}[self.dtype.type]
return self._engine_types[self.dtype]

@cache_readonly
def inferred_type(self) -> str:
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/api/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def test_api(self):
+ self.funcs_to
+ self.private_modules
)
self.check(pd, checkthese, self.ignored)
self.check(namespace=pd, expected=checkthese, ignored=self.ignored)

def test_depr(self):
deprecated_list = (
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/base/test_unique.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ def test_unique(index_or_series_obj):
if isinstance(obj, pd.MultiIndex):
expected = pd.MultiIndex.from_tuples(unique_values)
expected.names = obj.names
tm.assert_index_equal(result, expected)
tm.assert_index_equal(result, expected, exact=True)
elif isinstance(obj, pd.Index):
expected = pd.Index(unique_values, dtype=obj.dtype)
if is_datetime64tz_dtype(obj.dtype):
expected = expected.normalize()
tm.assert_index_equal(result, expected)
tm.assert_index_equal(result, expected, exact=True)
else:
expected = np.array(unique_values)
tm.assert_numpy_array_equal(result, expected)
Expand Down Expand Up @@ -67,7 +67,7 @@ def test_unique_null(null_obj, index_or_series_obj):
if is_datetime64tz_dtype(obj.dtype):
result = result.normalize()
expected = expected.normalize()
tm.assert_index_equal(result, expected)
tm.assert_index_equal(result, expected, exact=True)
else:
expected = np.array(unique_values, dtype=obj.dtype)
tm.assert_numpy_array_equal(result, expected)
Expand Down Expand Up @@ -118,7 +118,7 @@ def test_unique_bad_unicode(idx_or_series_w_bad_unicode):

if isinstance(obj, pd.Index):
expected = pd.Index(["\ud83d"], dtype=object)
tm.assert_index_equal(result, expected)
tm.assert_index_equal(result, expected, exact=True)
else:
expected = np.array(["\ud83d"], dtype=object)
tm.assert_numpy_array_equal(result, expected)
Expand Down