-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
Removed ABCs from pandas._typing #27424
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
9c6e616
6490263
cce9afb
1fe6059
5f2fa6a
72d3e2c
c3bd6df
5a8d35a
5889715
ed2ee7f
095aed4
4bf254c
8f4a7a1
5e77b75
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -1,34 +1,37 @@ | ||||||
from pathlib import Path | ||||||
from typing import IO, AnyStr, TypeVar, Union | ||||||
from typing import IO, TYPE_CHECKING, AnyStr, TypeVar, Union | ||||||
|
||||||
import numpy as np | ||||||
|
||||||
from pandas._libs import Timestamp | ||||||
from pandas._libs.tslibs.period import Period | ||||||
from pandas._libs.tslibs.timedeltas import Timedelta | ||||||
# To prevent import cycles place any internal imports in the branch below | ||||||
# and use a string literal forward reference to it in subsequent types | ||||||
# https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles | ||||||
if TYPE_CHECKING: | ||||||
from pandas._libs import Period, Timedelta, Timestamp | ||||||
|
||||||
from pandas.core.arrays.base import ExtensionArray | ||||||
from pandas.core.dtypes.dtypes import ExtensionDtype | ||||||
from pandas.core.dtypes.generic import ( | ||||||
ABCDataFrame, | ||||||
ABCExtensionArray, | ||||||
ABCIndexClass, | ||||||
ABCSeries, | ||||||
ABCSparseSeries, | ||||||
WillAyd marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
) | ||||||
from pandas.core.indexes.base import Index | ||||||
from pandas.core.frame import DataFrame | ||||||
from pandas.core.series import Series | ||||||
from pandas.core.sparse.series import SparseSeries | ||||||
|
||||||
from pandas.core.dtypes.dtypes import ExtensionDtype | ||||||
from pandas.core.dtypes.generic import ( | ||||||
ABCDataFrame, | ||||||
ABCExtensionArray, | ||||||
ABCIndexClass, | ||||||
ABCSeries, | ||||||
ABCSparseSeries, | ||||||
) | ||||||
|
||||||
AnyArrayLike = TypeVar( | ||||||
"AnyArrayLike", | ||||||
ABCExtensionArray, | ||||||
ABCIndexClass, | ||||||
ABCSeries, | ||||||
ABCSparseSeries, | ||||||
np.ndarray, | ||||||
"AnyArrayLike", "ExtensionArray", "Index", "Series", "SparseSeries", np.ndarray | ||||||
) | ||||||
ArrayLike = TypeVar("ArrayLike", ABCExtensionArray, np.ndarray) | ||||||
DatetimeLikeScalar = TypeVar("DatetimeLikeScalar", Period, Timestamp, Timedelta) | ||||||
Dtype = Union[str, np.dtype, ExtensionDtype] | ||||||
ArrayLike = TypeVar("ArrayLike", "ExtensionArray", np.ndarray) | ||||||
DatetimeLikeScalar = TypeVar("DatetimeLikeScalar", "Period", "Timestamp", "Timedelta") | ||||||
Dtype = Union[str, np.dtype, "ExtensionDtype"] | ||||||
FilePathOrBuffer = Union[str, Path, IO[AnyStr]] | ||||||
|
||||||
FrameOrSeries = TypeVar("FrameOrSeries", ABCSeries, ABCDataFrame) | ||||||
FrameOrSeries = TypeVar("FrameOrSeries", "Series", "DataFrame") | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
quote from https://mypy.readthedocs.io/en/latest/generics.html... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks but this just loosens the type system rather than actually fixing anything. TypeVar is going to be generally more useful for checking functions that can be fully generic in nature. Might just change the return of this one and see how many others require Union in the future There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. makes sense. Union[Series, DataFrame] might be better written as NDFrame anyway? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also the "user-defined generics" you are referring to are more applicable to containers not TypeVars. Right now we just use a blanket The TypeVar in the docs you linked is just a way of parametrizing that user-defined generic, so that a We are probably a ways off of doing user-defined generics but this is great that you looked into it. Certainly open to ideas on that front if you think of a good way to implement as we get more familiar with these annotations There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Hmm that would work though we don't typically import NDFrame anywhere so I don't think want to start here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would leave as FrameOrSeries as its more descriptive |
||||||
Scalar = Union[str, int, float] | ||||||
Axis = Union[str, int] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -906,35 +906,35 @@ def get_indexer( | |
) | ||
raise InvalidIndexError(msg) | ||
|
||
target = ensure_index(target) | ||
target_as_index = ensure_index(target) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if you also make There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same thing though - this just loosens the type checking which isn't desired. Actually moved towards TypeVars for reasons described in #26453 (comment) Might update #27050 to include some of that info There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
i don't think that applies here.
so reassigning with an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could add Union alternatives for each TypeVar in the central module but I think that confounds the point of generic programming and/or makes our type system weaker. Another option would be to allow redefinition of variables which mypy supplies a setting for: But I also think that makes for a weaker type system, and generally there's not a lot of downside to creating a separate variable here instead of allowing it's type to implicitly be altered by the return of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
definitely. we should rule this out.
disagree. the return type of is creating new variables not weakening the type system? |
||
|
||
if isinstance(target, IntervalIndex): | ||
if isinstance(target_as_index, IntervalIndex): | ||
# equal indexes -> 1:1 positional match | ||
if self.equals(target): | ||
if self.equals(target_as_index): | ||
return np.arange(len(self), dtype="intp") | ||
|
||
# different closed or incompatible subtype -> no matches | ||
common_subtype = find_common_type( | ||
[self.dtype.subtype, target.dtype.subtype] | ||
[self.dtype.subtype, target_as_index.dtype.subtype] | ||
) | ||
if self.closed != target.closed or is_object_dtype(common_subtype): | ||
return np.repeat(np.intp(-1), len(target)) | ||
if self.closed != target_as_index.closed or is_object_dtype(common_subtype): | ||
return np.repeat(np.intp(-1), len(target_as_index)) | ||
|
||
# non-overlapping -> at most one match per interval in target | ||
# non-overlapping -> at most one match per interval in target_as_index | ||
# want exact matches -> need both left/right to match, so defer to | ||
# left/right get_indexer, compare elementwise, equality -> match | ||
left_indexer = self.left.get_indexer(target.left) | ||
right_indexer = self.right.get_indexer(target.right) | ||
left_indexer = self.left.get_indexer(target_as_index.left) | ||
right_indexer = self.right.get_indexer(target_as_index.right) | ||
indexer = np.where(left_indexer == right_indexer, left_indexer, -1) | ||
elif not is_object_dtype(target): | ||
elif not is_object_dtype(target_as_index): | ||
# homogeneous scalar index: use IntervalTree | ||
target = self._maybe_convert_i8(target) | ||
indexer = self._engine.get_indexer(target.values) | ||
target_as_index = self._maybe_convert_i8(target_as_index) | ||
indexer = self._engine.get_indexer(target_as_index.values) | ||
else: | ||
# heterogeneous scalar index: defer elementwise to get_loc | ||
# (non-overlapping so get_loc guarantees scalar of KeyError) | ||
indexer = [] | ||
for key in target: | ||
for key in target_as_index: | ||
try: | ||
loc = self.get_loc(key) | ||
except KeyError: | ||
|
@@ -947,21 +947,26 @@ def get_indexer( | |
def get_indexer_non_unique( | ||
self, target: AnyArrayLike | ||
) -> Tuple[np.ndarray, np.ndarray]: | ||
target = ensure_index(target) | ||
target_as_index = ensure_index(target) | ||
|
||
# check that target IntervalIndex is compatible | ||
if isinstance(target, IntervalIndex): | ||
# check that target_as_index IntervalIndex is compatible | ||
if isinstance(target_as_index, IntervalIndex): | ||
common_subtype = find_common_type( | ||
[self.dtype.subtype, target.dtype.subtype] | ||
[self.dtype.subtype, target_as_index.dtype.subtype] | ||
) | ||
if self.closed != target.closed or is_object_dtype(common_subtype): | ||
if self.closed != target_as_index.closed or is_object_dtype(common_subtype): | ||
# different closed or incompatible subtype -> no matches | ||
return np.repeat(-1, len(target)), np.arange(len(target)) | ||
return ( | ||
np.repeat(-1, len(target_as_index)), | ||
np.arange(len(target_as_index)), | ||
) | ||
|
||
if is_object_dtype(target) or isinstance(target, IntervalIndex): | ||
# target might contain intervals: defer elementwise to get_loc | ||
if is_object_dtype(target_as_index) or isinstance( | ||
target_as_index, IntervalIndex | ||
): | ||
# target_as_index might contain intervals: defer elementwise to get_loc | ||
indexer, missing = [], [] | ||
for i, key in enumerate(target): | ||
for i, key in enumerate(target_as_index): | ||
try: | ||
locs = self.get_loc(key) | ||
if isinstance(locs, slice): | ||
|
@@ -973,8 +978,10 @@ def get_indexer_non_unique( | |
indexer.append(locs) | ||
indexer = np.concatenate(indexer) | ||
else: | ||
target = self._maybe_convert_i8(target) | ||
indexer, missing = self._engine.get_indexer_non_unique(target.values) | ||
target_as_index = self._maybe_convert_i8(target_as_index) | ||
indexer, missing = self._engine.get_indexer_non_unique( | ||
target_as_index.values | ||
) | ||
|
||
return ensure_platform_int(indexer), ensure_platform_int(missing) | ||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.