-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
CLN: add typing for dtype arg in core/arrays (GH38808) #38886
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7dccc3e
d41b2f2
349ad51
e7903b1
a856790
9d0df0a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
import operator | ||
from operator import le, lt | ||
import textwrap | ||
from typing import Sequence, Type, TypeVar | ||
from typing import Optional, Sequence, Type, TypeVar, cast | ||
|
||
import numpy as np | ||
|
||
|
@@ -14,7 +14,7 @@ | |
intervals_to_interval_bounds, | ||
) | ||
from pandas._libs.missing import NA | ||
from pandas._typing import ArrayLike | ||
from pandas._typing import ArrayLike, Dtype, NpDtype | ||
from pandas.compat.numpy import function as nv | ||
from pandas.util._decorators import Appender | ||
|
||
|
@@ -170,7 +170,7 @@ def __new__( | |
cls, | ||
data, | ||
closed=None, | ||
dtype=None, | ||
dtype: Optional[Dtype] = None, | ||
copy: bool = False, | ||
verify_integrity: bool = True, | ||
): | ||
|
@@ -212,7 +212,13 @@ def __new__( | |
|
||
@classmethod | ||
def _simple_new( | ||
cls, left, right, closed=None, copy=False, dtype=None, verify_integrity=True | ||
cls, | ||
left, | ||
right, | ||
closed=None, | ||
copy=False, | ||
dtype: Optional[Dtype] = None, | ||
verify_integrity=True, | ||
): | ||
result = IntervalMixin.__new__(cls) | ||
|
||
|
@@ -223,12 +229,14 @@ def _simple_new( | |
if dtype is not None: | ||
# GH 19262: dtype must be an IntervalDtype to override inferred | ||
dtype = pandas_dtype(dtype) | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if not is_interval_dtype(dtype): | ||
if is_interval_dtype(dtype): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could just do There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In #38680, we discussed the usage of isinstance vs is_* methods in combination with cast and the latter was preferred. Therefore I continued with it here. |
||
dtype = cast(IntervalDtype, dtype) | ||
if dtype.subtype is not None: | ||
left = left.astype(dtype.subtype) | ||
right = right.astype(dtype.subtype) | ||
else: | ||
Comment on lines
+232
to
+237
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why change the logic here? If the rest of the PR just adds type hints, maybe it would be safer to keep refactorings to a separate PR? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I changed the code because otherwise the Mypy checks will not pass. But do note the logic itself did not change, it is just the order of operations There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is fine (you just moved and changed the logical test), but puzzled why we don't need the clause on L229 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. cc @jbrockmendel if any idea here |
||
msg = f"dtype must be an IntervalDtype, got {dtype}" | ||
raise TypeError(msg) | ||
elif dtype.subtype is not None: | ||
left = left.astype(dtype.subtype) | ||
right = right.astype(dtype.subtype) | ||
|
||
# coerce dtypes to match if needed | ||
if is_float_dtype(left) and is_integer_dtype(right): | ||
|
@@ -279,7 +287,9 @@ def _simple_new( | |
return result | ||
|
||
@classmethod | ||
def _from_sequence(cls, scalars, *, dtype=None, copy=False): | ||
def _from_sequence( | ||
cls, scalars, *, dtype: Optional[Dtype] = None, copy: bool = False | ||
): | ||
return cls(scalars, dtype=dtype, copy=copy) | ||
|
||
@classmethod | ||
|
@@ -338,7 +348,9 @@ def _from_factorized(cls, values, original): | |
), | ||
} | ||
) | ||
def from_breaks(cls, breaks, closed="right", copy=False, dtype=None): | ||
def from_breaks( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. copy: bool |
||
cls, breaks, closed="right", copy: bool = False, dtype: Optional[Dtype] = None | ||
): | ||
breaks = maybe_convert_platform_interval(breaks) | ||
|
||
return cls.from_arrays(breaks[:-1], breaks[1:], closed, copy=copy, dtype=dtype) | ||
|
@@ -407,7 +419,9 @@ def from_breaks(cls, breaks, closed="right", copy=False, dtype=None): | |
), | ||
} | ||
) | ||
def from_arrays(cls, left, right, closed="right", copy=False, dtype=None): | ||
def from_arrays( | ||
cls, left, right, closed="right", copy=False, dtype: Optional[Dtype] = None | ||
): | ||
left = maybe_convert_platform_interval(left) | ||
right = maybe_convert_platform_interval(right) | ||
|
||
|
@@ -464,7 +478,9 @@ def from_arrays(cls, left, right, closed="right", copy=False, dtype=None): | |
), | ||
} | ||
) | ||
def from_tuples(cls, data, closed="right", copy=False, dtype=None): | ||
def from_tuples( | ||
cls, data, closed="right", copy=False, dtype: Optional[Dtype] = None | ||
): | ||
if len(data): | ||
left, right = [], [] | ||
else: | ||
|
@@ -1277,7 +1293,7 @@ def is_non_overlapping_monotonic(self): | |
# --------------------------------------------------------------------- | ||
# Conversion | ||
|
||
def __array__(self, dtype=None) -> np.ndarray: | ||
def __array__(self, dtype: Optional[NpDtype] = None) -> np.ndarray: | ||
""" | ||
Return the IntervalArray's data as a numpy array of Interval | ||
objects (with dtype='object') | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
by the time we get here we should have a DtypeObj shouldnt we?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
_simple_new
method is a.o. used in the__new__
method on L169 of this file. The__new__
method accepts the dtype values of typeDtype
and passes it without modification to the dtype arg of_simple_new
. Therefore I also gave it type Dtype here.