Skip to content

CLN: add typing for dtype arg in core/arrays (GH38808) #38886

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jan 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 29 additions & 13 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import operator
from operator import le, lt
import textwrap
from typing import Sequence, Type, TypeVar
from typing import Optional, Sequence, Type, TypeVar, cast

import numpy as np

Expand All @@ -14,7 +14,7 @@
intervals_to_interval_bounds,
)
from pandas._libs.missing import NA
from pandas._typing import ArrayLike
from pandas._typing import ArrayLike, Dtype, NpDtype
from pandas.compat.numpy import function as nv
from pandas.util._decorators import Appender

Expand Down Expand Up @@ -170,7 +170,7 @@ def __new__(
cls,
data,
closed=None,
dtype=None,
dtype: Optional[Dtype] = None,
copy: bool = False,
verify_integrity: bool = True,
):
Expand Down Expand Up @@ -212,7 +212,13 @@ def __new__(

@classmethod
def _simple_new(
cls, left, right, closed=None, copy=False, dtype=None, verify_integrity=True
cls,
left,
right,
closed=None,
copy=False,
dtype: Optional[Dtype] = None,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

by the time we get here we should have a DtypeObj shouldnt we?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The _simple_new method is a.o. used in the __new__ method on L169 of this file. The __new__ method accepts the dtype values of type Dtype and passes it without modification to the dtype arg of _simple_new. Therefore I also gave it type Dtype here.

verify_integrity=True,
):
result = IntervalMixin.__new__(cls)

Expand All @@ -223,12 +229,14 @@ def _simple_new(
if dtype is not None:
# GH 19262: dtype must be an IntervalDtype to override inferred
dtype = pandas_dtype(dtype)
if not is_interval_dtype(dtype):
if is_interval_dtype(dtype):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could just do isinstance(dtype, IntervalDtype) and avoid cast

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In #38680, we discussed the usage of isinstance vs is_* methods in combination with cast and the latter was preferred. Therefore I continued with it here.

dtype = cast(IntervalDtype, dtype)
if dtype.subtype is not None:
left = left.astype(dtype.subtype)
right = right.astype(dtype.subtype)
else:
Comment on lines +232 to +237
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why change the logic here? If the rest of the PR just adds type hints, maybe it would be safer to keep refactorings to a separate PR?

Copy link
Contributor Author

@avinashpancham avinashpancham Jan 3, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed the code because otherwise the Mypy checks will not pass. But do note the logic itself did not change, it is just the order of operations

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is fine (you just moved and changed the logical test), but puzzled why we don't need the clause on L229

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cc @jbrockmendel if any idea here

msg = f"dtype must be an IntervalDtype, got {dtype}"
raise TypeError(msg)
elif dtype.subtype is not None:
left = left.astype(dtype.subtype)
right = right.astype(dtype.subtype)

# coerce dtypes to match if needed
if is_float_dtype(left) and is_integer_dtype(right):
Expand Down Expand Up @@ -279,7 +287,9 @@ def _simple_new(
return result

@classmethod
def _from_sequence(cls, scalars, *, dtype=None, copy=False):
def _from_sequence(
cls, scalars, *, dtype: Optional[Dtype] = None, copy: bool = False
):
return cls(scalars, dtype=dtype, copy=copy)

@classmethod
Expand Down Expand Up @@ -338,7 +348,9 @@ def _from_factorized(cls, values, original):
),
}
)
def from_breaks(cls, breaks, closed="right", copy=False, dtype=None):
def from_breaks(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

copy: bool

cls, breaks, closed="right", copy: bool = False, dtype: Optional[Dtype] = None
):
breaks = maybe_convert_platform_interval(breaks)

return cls.from_arrays(breaks[:-1], breaks[1:], closed, copy=copy, dtype=dtype)
Expand Down Expand Up @@ -407,7 +419,9 @@ def from_breaks(cls, breaks, closed="right", copy=False, dtype=None):
),
}
)
def from_arrays(cls, left, right, closed="right", copy=False, dtype=None):
def from_arrays(
cls, left, right, closed="right", copy=False, dtype: Optional[Dtype] = None
):
left = maybe_convert_platform_interval(left)
right = maybe_convert_platform_interval(right)

Expand Down Expand Up @@ -464,7 +478,9 @@ def from_arrays(cls, left, right, closed="right", copy=False, dtype=None):
),
}
)
def from_tuples(cls, data, closed="right", copy=False, dtype=None):
def from_tuples(
cls, data, closed="right", copy=False, dtype: Optional[Dtype] = None
):
if len(data):
left, right = [], []
else:
Expand Down Expand Up @@ -1277,7 +1293,7 @@ def is_non_overlapping_monotonic(self):
# ---------------------------------------------------------------------
# Conversion

def __array__(self, dtype=None) -> np.ndarray:
def __array__(self, dtype: Optional[NpDtype] = None) -> np.ndarray:
"""
Return the IntervalArray's data as a numpy array of Interval
objects (with dtype='object')
Expand Down
9 changes: 6 additions & 3 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import numpy as np

from pandas._libs import lib, missing as libmissing
from pandas._typing import ArrayLike, Dtype, Scalar
from pandas._typing import ArrayLike, Dtype, NpDtype, Scalar
from pandas.errors import AbstractMethodError
from pandas.util._decorators import cache_readonly, doc

Expand Down Expand Up @@ -147,7 +147,10 @@ def __invert__(self: BaseMaskedArrayT) -> BaseMaskedArrayT:
return type(self)(~self._data, self._mask)

def to_numpy(
self, dtype=None, copy: bool = False, na_value: Scalar = lib.no_default
self,
dtype: Optional[NpDtype] = None,
copy: bool = False,
na_value: Scalar = lib.no_default,
) -> np.ndarray:
"""
Convert to a NumPy Array.
Expand Down Expand Up @@ -257,7 +260,7 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:

__array_priority__ = 1000 # higher than ndarray so ops dispatch to us

def __array__(self, dtype=None) -> np.ndarray:
def __array__(self, dtype: Optional[NpDtype] = None) -> np.ndarray:
"""
the array interface, return my values
We return an object array here to preserve our scalar values
Expand Down
72 changes: 60 additions & 12 deletions pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import numbers
from typing import Tuple, Type, Union
from typing import Optional, Tuple, Type, Union

import numpy as np
from numpy.lib.mixins import NDArrayOperatorsMixin

from pandas._libs import lib
from pandas._typing import Scalar
from pandas._typing import Dtype, NpDtype, Scalar
from pandas.compat.numpy import function as nv

from pandas.core.dtypes.dtypes import ExtensionDtype
Expand Down Expand Up @@ -38,7 +38,7 @@ class PandasDtype(ExtensionDtype):

_metadata = ("_dtype",)

def __init__(self, dtype: object):
def __init__(self, dtype: Optional[NpDtype]):
self._dtype = np.dtype(dtype)

def __repr__(self) -> str:
Expand Down Expand Up @@ -173,7 +173,7 @@ def __init__(self, values: Union[np.ndarray, "PandasArray"], copy: bool = False)

@classmethod
def _from_sequence(
cls, scalars, *, dtype=None, copy: bool = False
cls, scalars, *, dtype: Optional[Dtype] = None, copy: bool = False
) -> "PandasArray":
if isinstance(dtype, PandasDtype):
dtype = dtype._dtype
Expand All @@ -200,7 +200,7 @@ def dtype(self) -> PandasDtype:
# ------------------------------------------------------------------------
# NumPy Array Interface

def __array__(self, dtype=None) -> np.ndarray:
def __array__(self, dtype: Optional[NpDtype] = None) -> np.ndarray:
return np.asarray(self._ndarray, dtype=dtype)

_HANDLED_TYPES = (np.ndarray, numbers.Number)
Expand Down Expand Up @@ -311,7 +311,15 @@ def prod(self, *, axis=None, skipna=True, min_count=0, **kwargs) -> Scalar:
)
return self._wrap_reduction_result(axis, result)

def mean(self, *, axis=None, dtype=None, out=None, keepdims=False, skipna=True):
def mean(
self,
*,
axis=None,
dtype: Optional[NpDtype] = None,
out=None,
keepdims=False,
skipna=True,
):
nv.validate_mean((), {"dtype": dtype, "out": out, "keepdims": keepdims})
result = nanops.nanmean(self._ndarray, axis=axis, skipna=skipna)
return self._wrap_reduction_result(axis, result)
Expand All @@ -326,7 +334,14 @@ def median(
return self._wrap_reduction_result(axis, result)

def std(
self, *, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True
self,
*,
axis=None,
dtype: Optional[NpDtype] = None,
out=None,
ddof=1,
keepdims=False,
skipna=True,
):
nv.validate_stat_ddof_func(
(), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="std"
Expand All @@ -335,7 +350,14 @@ def std(
return self._wrap_reduction_result(axis, result)

def var(
self, *, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True
self,
*,
axis=None,
dtype: Optional[NpDtype] = None,
out=None,
ddof=1,
keepdims=False,
skipna=True,
):
nv.validate_stat_ddof_func(
(), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="var"
Expand All @@ -344,22 +366,45 @@ def var(
return self._wrap_reduction_result(axis, result)

def sem(
self, *, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True
self,
*,
axis=None,
dtype: Optional[NpDtype] = None,
out=None,
ddof=1,
keepdims=False,
skipna=True,
):
nv.validate_stat_ddof_func(
(), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="sem"
)
result = nanops.nansem(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
return self._wrap_reduction_result(axis, result)

def kurt(self, *, axis=None, dtype=None, out=None, keepdims=False, skipna=True):
def kurt(
self,
*,
axis=None,
dtype: Optional[NpDtype] = None,
out=None,
keepdims=False,
skipna=True,
):
nv.validate_stat_ddof_func(
(), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="kurt"
)
result = nanops.nankurt(self._ndarray, axis=axis, skipna=skipna)
return self._wrap_reduction_result(axis, result)

def skew(self, *, axis=None, dtype=None, out=None, keepdims=False, skipna=True):
def skew(
self,
*,
axis=None,
dtype: Optional[NpDtype] = None,
out=None,
keepdims=False,
skipna=True,
):
nv.validate_stat_ddof_func(
(), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="skew"
)
Expand All @@ -370,7 +415,10 @@ def skew(self, *, axis=None, dtype=None, out=None, keepdims=False, skipna=True):
# Additional Methods

def to_numpy(
self, dtype=None, copy: bool = False, na_value=lib.no_default
self,
dtype: Optional[NpDtype] = None,
copy: bool = False,
na_value=lib.no_default,
) -> np.ndarray:
result = np.asarray(self._ndarray, dtype=dtype)

Expand Down
13 changes: 8 additions & 5 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
get_period_field_arr,
period_asfreq_arr,
)
from pandas._typing import AnyArrayLike, Dtype
from pandas._typing import AnyArrayLike, Dtype, NpDtype
from pandas.util._decorators import cache_readonly, doc

from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -159,7 +159,7 @@ class PeriodArray(PeriodMixin, dtl.DatelikeOps):
# --------------------------------------------------------------------
# Constructors

def __init__(self, values, dtype=None, freq=None, copy=False):
def __init__(self, values, dtype: Optional[Dtype] = None, freq=None, copy=False):
freq = validate_dtype_freq(dtype, freq)

if freq is not None:
Expand All @@ -186,7 +186,10 @@ def __init__(self, values, dtype=None, freq=None, copy=False):

@classmethod
def _simple_new(
cls, values: np.ndarray, freq: Optional[BaseOffset] = None, dtype=None
cls,
values: np.ndarray,
freq: Optional[BaseOffset] = None,
dtype: Optional[Dtype] = None,
) -> "PeriodArray":
# alias for PeriodArray.__init__
assertion_msg = "Should be numpy array of type i8"
Expand Down Expand Up @@ -220,7 +223,7 @@ def _from_sequence(

@classmethod
def _from_sequence_of_strings(
cls, strings, *, dtype=None, copy=False
cls, strings, *, dtype: Optional[Dtype] = None, copy=False
) -> "PeriodArray":
return cls._from_sequence(strings, dtype=dtype, copy=copy)

Expand Down Expand Up @@ -301,7 +304,7 @@ def freq(self) -> BaseOffset:
"""
return self.dtype.freq

def __array__(self, dtype=None) -> np.ndarray:
def __array__(self, dtype: Optional[NpDtype] = None) -> np.ndarray:
if dtype == "i8":
return self.asi8
elif dtype == bool:
Expand Down
Loading