Skip to content

Commit b7708f0

Browse files
authored
CLN: assorted (#49850)
* CLN: assorted * revert * mypy fixup * mypy fixup * troubleshoot min_version build
1 parent 8f47982 commit b7708f0

File tree

21 files changed

+90
-202
lines changed

21 files changed

+90
-202
lines changed

doc/source/whatsnew/v2.0.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -467,12 +467,12 @@ Removal of prior version deprecations/changes
467467
- Disallow passing non-keyword arguments to :meth:`DataFrame.where` and :meth:`Series.where` except for ``cond`` and ``other`` (:issue:`41523`)
468468
- Disallow passing non-keyword arguments to :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` except for ``labels`` (:issue:`41491`)
469469
- Disallow passing non-keyword arguments to :meth:`Series.rename_axis` and :meth:`DataFrame.rename_axis` except for ``mapper`` (:issue:`47587`)
470-
- Disallow :meth:`Index.reindex` with non-unique :class:`Index` objects (:issue:`42568`)
471470
- Disallow passing non-keyword arguments to :meth:`Series.clip` and :meth:`DataFrame.clip` (:issue:`41511`)
472471
- Disallow passing non-keyword arguments to :meth:`Series.bfill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill` and :meth:`DataFrame.ffill` (:issue:`41508`)
473472
- Disallow passing non-keyword arguments to :meth:`DataFrame.replace`, :meth:`Series.replace` except for ``to_replace`` and ``value`` (:issue:`47587`)
474473
- Disallow passing non-keyword arguments to :meth:`DataFrame.sort_values` except for ``by`` (:issue:`41505`)
475474
- Disallow passing non-keyword arguments to :meth:`Series.sort_values` (:issue:`41505`)
475+
- Disallow :meth:`Index.reindex` with non-unique :class:`Index` objects (:issue:`42568`)
476476
- Disallowed constructing :class:`Categorical` with scalar ``data`` (:issue:`38433`)
477477
- Disallowed constructing :class:`CategoricalIndex` without passing ``data`` (:issue:`38944`)
478478
- Removed :meth:`.Rolling.validate`, :meth:`.Expanding.validate`, and :meth:`.ExponentialMovingWindow.validate` (:issue:`43665`)

pandas/core/arrays/datetimes.py

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ def _from_sequence_not_strict(
319319

320320
dtype = _validate_dt64_dtype(dtype)
321321
# if dtype has an embedded tz, capture it
322-
tz = validate_tz_from_dtype(dtype, tz, explicit_tz_none)
322+
tz = _validate_tz_from_dtype(dtype, tz, explicit_tz_none)
323323

324324
unit = None
325325
if dtype is not None:
@@ -338,7 +338,7 @@ def _from_sequence_not_strict(
338338
ambiguous=ambiguous,
339339
)
340340
# We have to call this again after possibly inferring a tz above
341-
validate_tz_from_dtype(dtype, tz, explicit_tz_none)
341+
_validate_tz_from_dtype(dtype, tz, explicit_tz_none)
342342
if tz is not None and explicit_tz_none:
343343
raise ValueError(
344344
"Passed data is timezone-aware, incompatible with 'tz=None'. "
@@ -1953,18 +1953,6 @@ def std(
19531953
# Constructor Helpers
19541954

19551955

1956-
def sequence_to_datetimes(data) -> DatetimeArray:
1957-
"""
1958-
Parse/convert the passed data to either DatetimeArray or np.ndarray[object].
1959-
"""
1960-
result, tz, freq = _sequence_to_dt64ns(data)
1961-
1962-
unit = np.datetime_data(result.dtype)[0]
1963-
dtype = tz_to_dtype(tz, unit)
1964-
dta = DatetimeArray._simple_new(result, freq=freq, dtype=dtype)
1965-
return dta
1966-
1967-
19681956
def _sequence_to_dt64ns(
19691957
data,
19701958
*,
@@ -2303,7 +2291,7 @@ def _validate_dt64_dtype(dtype):
23032291
23042292
Notes
23052293
-----
2306-
Unlike validate_tz_from_dtype, this does _not_ allow non-existent
2294+
Unlike _validate_tz_from_dtype, this does _not_ allow non-existent
23072295
tz errors to go through
23082296
"""
23092297
if dtype is not None:
@@ -2338,7 +2326,7 @@ def _validate_dt64_dtype(dtype):
23382326
return dtype
23392327

23402328

2341-
def validate_tz_from_dtype(
2329+
def _validate_tz_from_dtype(
23422330
dtype, tz: tzinfo | None, explicit_tz_none: bool = False
23432331
) -> tzinfo | None:
23442332
"""

pandas/core/arrays/sparse/array.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1884,11 +1884,7 @@ def make_sparse(
18841884
index = make_sparse_index(length, indices, kind)
18851885
sparsified_values = arr[mask]
18861886
if dtype is not None:
1887-
# error: Argument "dtype" to "astype_nansafe" has incompatible type "Union[str,
1888-
# dtype[Any]]"; expected "Union[dtype[Any], ExtensionDtype]"
1889-
sparsified_values = astype_nansafe(
1890-
sparsified_values, dtype=dtype # type: ignore[arg-type]
1891-
)
1887+
sparsified_values = astype_nansafe(sparsified_values, dtype=pandas_dtype(dtype))
18921888
# TODO: copy
18931889
return sparsified_values, index, fill_value
18941890

pandas/core/arrays/sparse/dtype.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,8 @@ def update_dtype(self, dtype) -> SparseDtype:
354354
if not isinstance(dtype, np.dtype):
355355
raise TypeError("sparse arrays of extension dtypes not supported")
356356

357-
fvarr = astype_nansafe(np.array(self.fill_value), dtype)
357+
fv_asarray = np.atleast_1d(np.array(self.fill_value))
358+
fvarr = astype_nansafe(fv_asarray, dtype)
358359
# NB: not fv_0d.item(), as that casts dt64->int
359360
fill_value = fvarr[0]
360361
dtype = cls(dtype, fill_value=fill_value)

pandas/core/construction.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
maybe_cast_to_integer_array,
4141
maybe_convert_platform,
4242
maybe_infer_to_datetimelike,
43-
maybe_upcast,
43+
maybe_promote,
4444
)
4545
from pandas.core.dtypes.common import (
4646
is_datetime64_ns_dtype,
@@ -484,7 +484,11 @@ def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray:
484484
"""
485485
mask = ma.getmaskarray(data)
486486
if mask.any():
487-
data, fill_value = maybe_upcast(data, copy=True)
487+
dtype, fill_value = maybe_promote(data.dtype, np.nan)
488+
dtype = cast(np.dtype, dtype)
489+
# Incompatible types in assignment (expression has type "ndarray[Any,
490+
# dtype[Any]]", variable has type "MaskedArray[Any, Any]")
491+
data = data.astype(dtype, copy=True) # type: ignore[assignment]
488492
data.soften_mask() # set hardmask False if it was True
489493
data[mask] = fill_value
490494
else:

pandas/core/dtypes/astype.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,6 @@ def astype_nansafe(
7878
The dtype was a datetime64/timedelta64 dtype, but it had no unit.
7979
"""
8080

81-
# We get here with 0-dim from sparse
82-
arr = np.atleast_1d(arr)
83-
8481
# dispatch on extension dtype if needed
8582
if isinstance(dtype, ExtensionDtype):
8683
return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy)

pandas/core/dtypes/cast.py

Lines changed: 8 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,6 @@
8484
)
8585
from pandas.core.dtypes.inference import is_list_like
8686
from pandas.core.dtypes.missing import (
87-
array_equivalent,
8887
is_valid_na_for_dtype,
8988
isna,
9089
na_value_for_dtype,
@@ -404,7 +403,7 @@ def trans(x):
404403
elif dtype.kind == result.dtype.kind == "c":
405404
new_result = result.astype(dtype)
406405

407-
if array_equivalent(new_result, result):
406+
if np.array_equal(new_result, result, equal_nan=True):
408407
# TODO: use tolerance like we do for float?
409408
return new_result
410409

@@ -543,10 +542,6 @@ def maybe_promote(dtype: np.dtype, fill_value=np.nan):
543542
ValueError
544543
If fill_value is a non-scalar and dtype is not object.
545544
"""
546-
# TODO(2.0): need to directly use the non-cached version as long as we
547-
# possibly raise a deprecation warning for datetime dtype
548-
if dtype.kind == "M":
549-
return _maybe_promote(dtype, fill_value)
550545
# for performance, we are using a cached version of the actual implementation
551546
# of the function in _maybe_promote. However, this doesn't always work (in case
552547
# of non-hashable arguments), so we fallback to the actual implementation if needed
@@ -923,40 +918,6 @@ def _maybe_infer_dtype_type(element):
923918
return tipo
924919

925920

926-
def maybe_upcast(
927-
values: NumpyArrayT,
928-
fill_value: Scalar = np.nan,
929-
copy: bool = False,
930-
) -> tuple[NumpyArrayT, Scalar]:
931-
"""
932-
Provide explicit type promotion and coercion.
933-
934-
Parameters
935-
----------
936-
values : np.ndarray
937-
The array that we may want to upcast.
938-
fill_value : what we want to fill with
939-
copy : bool, default True
940-
If True always make a copy even if no upcast is required.
941-
942-
Returns
943-
-------
944-
values: np.ndarray
945-
the original array, possibly upcast
946-
fill_value:
947-
the fill value, possibly upcast
948-
"""
949-
new_dtype, fill_value = maybe_promote(values.dtype, fill_value)
950-
# We get a copy in all cases _except_ (values.dtype == new_dtype and not copy)
951-
upcast_values = values.astype(new_dtype, copy=copy)
952-
953-
# error: Incompatible return value type (got "Tuple[ndarray[Any, dtype[Any]],
954-
# Union[Union[str, int, float, bool] Union[Period, Timestamp, Timedelta, Any]]]",
955-
# expected "Tuple[NumpyArrayT, Union[Union[str, int, float, bool], Union[Period,
956-
# Timestamp, Timedelta, Any]]]")
957-
return upcast_values, fill_value # type: ignore[return-value]
958-
959-
960921
def invalidate_string_dtypes(dtype_set: set[DtypeObj]) -> None:
961922
"""
962923
Change string like dtypes to object for
@@ -1227,20 +1188,16 @@ def maybe_cast_to_datetime(
12271188
if not is_list_like(value):
12281189
raise TypeError("value must be listlike")
12291190

1191+
# TODO: _from_sequence would raise ValueError in cases where
1192+
# _ensure_nanosecond_dtype raises TypeError
1193+
# Incompatible types in assignment (expression has type "Union[dtype[Any],
1194+
# ExtensionDtype]", variable has type "Optional[dtype[Any]]")
1195+
dtype = _ensure_nanosecond_dtype(dtype) # type: ignore[assignment]
1196+
12301197
if is_timedelta64_dtype(dtype):
1231-
# TODO: _from_sequence would raise ValueError in cases where
1232-
# _ensure_nanosecond_dtype raises TypeError
1233-
# Incompatible types in assignment (expression has type "Union[dtype[Any],
1234-
# ExtensionDtype]", variable has type "Optional[dtype[Any]]")
1235-
dtype = _ensure_nanosecond_dtype(dtype) # type: ignore[assignment]
12361198
res = TimedeltaArray._from_sequence(value, dtype=dtype)
12371199
return res
1238-
12391200
else:
1240-
# error: Incompatible types in assignment (expression has type
1241-
# "Union[dtype[Any], ExtensionDtype]", variable has type "Optional[dtype[Any]]")
1242-
dtype = _ensure_nanosecond_dtype(dtype) # type: ignore[assignment]
1243-
12441201
try:
12451202
dta = DatetimeArray._from_sequence(value, dtype=dtype)
12461203
except ValueError as err:
@@ -1838,8 +1795,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
18381795
if isinstance(element, np.ndarray):
18391796
# e.g. TestDataFrameIndexingWhere::test_where_alignment
18401797
casted = element.astype(dtype)
1841-
# TODO(np>=1.20): we can just use np.array_equal with equal_nan
1842-
if array_equivalent(casted, element):
1798+
if np.array_equal(casted, element, equal_nan=True):
18431799
return casted
18441800
raise LossySetitemError
18451801

pandas/core/indexes/base.py

Lines changed: 15 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -206,22 +206,6 @@
206206
_dtype_obj = np.dtype("object")
207207

208208

209-
def _wrapped_sanitize(cls, data, dtype: DtypeObj | None, copy: bool):
210-
"""
211-
Call sanitize_array with wrapping for differences between Index/Series.
212-
"""
213-
try:
214-
arr = sanitize_array(data, None, dtype=dtype, copy=copy, strict_ints=True)
215-
except ValueError as err:
216-
if "index must be specified when data is not list-like" in str(err):
217-
raise cls._raise_scalar_data_error(data) from err
218-
if "Data must be 1-dimensional" in str(err):
219-
raise ValueError("Index data must be 1-dimensional") from err
220-
raise
221-
arr = ensure_wrapped_if_datetimelike(arr)
222-
return arr
223-
224-
225209
def _maybe_return_indexers(meth: F) -> F:
226210
"""
227211
Decorator to simplify 'return_indexers' checks in Index.join.
@@ -514,7 +498,16 @@ def __new__(
514498
# Ensure we get 1-D array of tuples instead of 2D array.
515499
data = com.asarray_tuplesafe(data, dtype=_dtype_obj)
516500

517-
arr = _wrapped_sanitize(cls, data, dtype, copy)
501+
try:
502+
arr = sanitize_array(data, None, dtype=dtype, copy=copy, strict_ints=True)
503+
except ValueError as err:
504+
if "index must be specified when data is not list-like" in str(err):
505+
raise cls._raise_scalar_data_error(data) from err
506+
if "Data must be 1-dimensional" in str(err):
507+
raise ValueError("Index data must be 1-dimensional") from err
508+
raise
509+
arr = ensure_wrapped_if_datetimelike(arr)
510+
518511
klass = cls._dtype_to_subclass(arr.dtype)
519512

520513
# _ensure_array _may_ be unnecessary once Int64Index etc are gone
@@ -865,19 +858,11 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs):
865858
if any(isinstance(other, (ABCSeries, ABCDataFrame)) for other in inputs):
866859
return NotImplemented
867860

868-
# TODO(2.0) the 'and', 'or' and 'xor' dunder methods are currently set
869-
# operations and not logical operations, so don't dispatch
870-
# This is deprecated, so this full 'if' clause can be removed once
871-
# deprecation is enforced in 2.0
872-
if not (
873-
method == "__call__"
874-
and ufunc in (np.bitwise_and, np.bitwise_or, np.bitwise_xor)
875-
):
876-
result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
877-
self, ufunc, method, *inputs, **kwargs
878-
)
879-
if result is not NotImplemented:
880-
return result
861+
result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
862+
self, ufunc, method, *inputs, **kwargs
863+
)
864+
if result is not NotImplemented:
865+
return result
881866

882867
if "out" in kwargs:
883868
# e.g. test_dti_isub_tdi

pandas/core/indexes/datetimes.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@
4242
from pandas.core.dtypes.common import (
4343
is_datetime64_dtype,
4444
is_datetime64tz_dtype,
45-
is_dtype_equal,
4645
is_scalar,
4746
)
4847
from pandas.core.dtypes.missing import is_valid_na_for_dtype
@@ -331,18 +330,6 @@ def __new__(
331330
if copy:
332331
data = data.copy()
333332
return cls._simple_new(data, name=name)
334-
elif (
335-
isinstance(data, DatetimeArray)
336-
and freq is lib.no_default
337-
and tz is lib.no_default
338-
and is_dtype_equal(data.dtype, dtype)
339-
):
340-
# Reached via Index.__new__ when we call .astype
341-
# TODO(2.0): special casing can be removed once _from_sequence_not_strict
342-
# no longer chokes on non-nano
343-
if copy:
344-
data = data.copy()
345-
return cls._simple_new(data, name=name)
346333

347334
dtarr = DatetimeArray._from_sequence_not_strict(
348335
data,

pandas/core/indexes/timedeltas.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from pandas._typing import DtypeObj
1313

1414
from pandas.core.dtypes.common import (
15+
is_dtype_equal,
1516
is_scalar,
1617
is_timedelta64_dtype,
1718
)
@@ -135,13 +136,21 @@ def __new__(
135136
"represent unambiguous timedelta values durations."
136137
)
137138

138-
# FIXME: need to check for dtype/data match
139-
if isinstance(data, TimedeltaArray) and freq is lib.no_default:
139+
if (
140+
isinstance(data, TimedeltaArray)
141+
and freq is lib.no_default
142+
and (dtype is None or is_dtype_equal(dtype, data.dtype))
143+
):
140144
if copy:
141145
data = data.copy()
142146
return cls._simple_new(data, name=name)
143147

144-
if isinstance(data, TimedeltaIndex) and freq is lib.no_default and name is None:
148+
if (
149+
isinstance(data, TimedeltaIndex)
150+
and freq is lib.no_default
151+
and name is None
152+
and (dtype is None or is_dtype_equal(dtype, data.dtype))
153+
):
145154
if copy:
146155
return data.copy()
147156
else:

pandas/core/internals/blocks.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1134,6 +1134,7 @@ def where(self, other, cond, _downcast: str | bool = "infer") -> list[Block]:
11341134

11351135
return [self.make_block(result)]
11361136

1137+
@final
11371138
def fillna(
11381139
self, value, limit: int | None = None, inplace: bool = False, downcast=None
11391140
) -> list[Block]:

0 commit comments

Comments
 (0)