pandas-dev · mroeschke · Nov 23, 2022 · Nov 23, 2022 · Nov 23, 2022 · Nov 23, 2022
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -467,12 +467,12 @@ Removal of prior version deprecations/changes
 - Disallow passing non-keyword arguments to :meth:`DataFrame.where` and :meth:`Series.where` except for ``cond`` and ``other`` (:issue:`41523`)
 - Disallow passing non-keyword arguments to :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` except for ``labels`` (:issue:`41491`)
 - Disallow passing non-keyword arguments to :meth:`Series.rename_axis` and :meth:`DataFrame.rename_axis` except for ``mapper`` (:issue:`47587`)
-- Disallow :meth:`Index.reindex` with non-unique :class:`Index` objects (:issue:`42568`)
 - Disallow passing non-keyword arguments to :meth:`Series.clip` and :meth:`DataFrame.clip` (:issue:`41511`)
 - Disallow passing non-keyword arguments to :meth:`Series.bfill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill` and :meth:`DataFrame.ffill` (:issue:`41508`)
 - Disallow passing non-keyword arguments to :meth:`DataFrame.replace`, :meth:`Series.replace` except for ``to_replace`` and ``value`` (:issue:`47587`)
 - Disallow passing non-keyword arguments to :meth:`DataFrame.sort_values` except for ``by`` (:issue:`41505`)
 - Disallow passing non-keyword arguments to :meth:`Series.sort_values` (:issue:`41505`)
+- Disallow :meth:`Index.reindex` with non-unique :class:`Index` objects (:issue:`42568`)
 - Disallowed constructing :class:`Categorical` with scalar ``data`` (:issue:`38433`)
 - Disallowed constructing :class:`CategoricalIndex` without passing ``data`` (:issue:`38944`)
 - Removed :meth:`.Rolling.validate`, :meth:`.Expanding.validate`, and :meth:`.ExponentialMovingWindow.validate` (:issue:`43665`)

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -319,7 +319,7 @@ def _from_sequence_not_strict(
 
         dtype = _validate_dt64_dtype(dtype)
         # if dtype has an embedded tz, capture it
-        tz = validate_tz_from_dtype(dtype, tz, explicit_tz_none)
+        tz = _validate_tz_from_dtype(dtype, tz, explicit_tz_none)
 
         unit = None
         if dtype is not None:
@@ -338,7 +338,7 @@ def _from_sequence_not_strict(
             ambiguous=ambiguous,
         )
         # We have to call this again after possibly inferring a tz above
-        validate_tz_from_dtype(dtype, tz, explicit_tz_none)
+        _validate_tz_from_dtype(dtype, tz, explicit_tz_none)
         if tz is not None and explicit_tz_none:
             raise ValueError(
                 "Passed data is timezone-aware, incompatible with 'tz=None'. "
@@ -1953,18 +1953,6 @@ def std(
 # Constructor Helpers
 
 
-def sequence_to_datetimes(data) -> DatetimeArray:
-    """
-    Parse/convert the passed data to either DatetimeArray or np.ndarray[object].
-    """
-    result, tz, freq = _sequence_to_dt64ns(data)
-
-    unit = np.datetime_data(result.dtype)[0]
-    dtype = tz_to_dtype(tz, unit)
-    dta = DatetimeArray._simple_new(result, freq=freq, dtype=dtype)
-    return dta
-
-
 def _sequence_to_dt64ns(
     data,
     *,
@@ -2303,7 +2291,7 @@ def _validate_dt64_dtype(dtype):
 
     Notes
     -----
-    Unlike validate_tz_from_dtype, this does _not_ allow non-existent
+    Unlike _validate_tz_from_dtype, this does _not_ allow non-existent
     tz errors to go through
     """
     if dtype is not None:
@@ -2338,7 +2326,7 @@ def _validate_dt64_dtype(dtype):
     return dtype
 
 
-def validate_tz_from_dtype(
+def _validate_tz_from_dtype(
     dtype, tz: tzinfo | None, explicit_tz_none: bool = False
 ) -> tzinfo | None:
     """

diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
@@ -1884,11 +1884,7 @@ def make_sparse(
     index = make_sparse_index(length, indices, kind)
     sparsified_values = arr[mask]
     if dtype is not None:
-        # error: Argument "dtype" to "astype_nansafe" has incompatible type "Union[str,
-        # dtype[Any]]"; expected "Union[dtype[Any], ExtensionDtype]"
-        sparsified_values = astype_nansafe(
-            sparsified_values, dtype=dtype  # type: ignore[arg-type]
-        )
+        sparsified_values = astype_nansafe(sparsified_values, dtype=pandas_dtype(dtype))
     # TODO: copy
     return sparsified_values, index, fill_value
 

diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py
@@ -354,7 +354,8 @@ def update_dtype(self, dtype) -> SparseDtype:
             if not isinstance(dtype, np.dtype):
                 raise TypeError("sparse arrays of extension dtypes not supported")
 
-            fvarr = astype_nansafe(np.array(self.fill_value), dtype)
+            fv_asarray = np.atleast_1d(np.array(self.fill_value))
+            fvarr = astype_nansafe(fv_asarray, dtype)
             # NB: not fv_0d.item(), as that casts dt64->int
             fill_value = fvarr[0]
             dtype = cls(dtype, fill_value=fill_value)

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
@@ -40,7 +40,7 @@
     maybe_cast_to_integer_array,
     maybe_convert_platform,
     maybe_infer_to_datetimelike,
-    maybe_upcast,
+    maybe_promote,
 )
 from pandas.core.dtypes.common import (
     is_datetime64_ns_dtype,
@@ -484,7 +484,11 @@ def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray:
     """
     mask = ma.getmaskarray(data)
     if mask.any():
-        data, fill_value = maybe_upcast(data, copy=True)
+        dtype, fill_value = maybe_promote(data.dtype, np.nan)
+        dtype = cast(np.dtype, dtype)
+        # Incompatible types in assignment (expression has type "ndarray[Any,
+        # dtype[Any]]", variable has type "MaskedArray[Any, Any]")
+        data = data.astype(dtype, copy=True)  # type: ignore[assignment]
         data.soften_mask()  # set hardmask False if it was True
         data[mask] = fill_value
     else:

diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py
@@ -78,9 +78,6 @@ def astype_nansafe(
         The dtype was a datetime64/timedelta64 dtype, but it had no unit.
     """
 
-    # We get here with 0-dim from sparse
-    arr = np.atleast_1d(arr)
-
     # dispatch on extension dtype if needed
     if isinstance(dtype, ExtensionDtype):
         return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -84,7 +84,6 @@
 )
 from pandas.core.dtypes.inference import is_list_like
 from pandas.core.dtypes.missing import (
-    array_equivalent,
     is_valid_na_for_dtype,
     isna,
     na_value_for_dtype,
@@ -404,7 +403,7 @@ def trans(x):
     elif dtype.kind == result.dtype.kind == "c":
         new_result = result.astype(dtype)
 
-        if array_equivalent(new_result, result):
+        if np.array_equal(new_result, result, equal_nan=True):
             # TODO: use tolerance like we do for float?
             return new_result
 
@@ -543,10 +542,6 @@ def maybe_promote(dtype: np.dtype, fill_value=np.nan):
     ValueError
         If fill_value is a non-scalar and dtype is not object.
     """
-    # TODO(2.0): need to directly use the non-cached version as long as we
-    # possibly raise a deprecation warning for datetime dtype
-    if dtype.kind == "M":
-        return _maybe_promote(dtype, fill_value)
     # for performance, we are using a cached version of the actual implementation
     # of the function in _maybe_promote. However, this doesn't always work (in case
     # of non-hashable arguments), so we fallback to the actual implementation if needed
@@ -923,40 +918,6 @@ def _maybe_infer_dtype_type(element):
     return tipo
 
 
-def maybe_upcast(
-    values: NumpyArrayT,
-    fill_value: Scalar = np.nan,
-    copy: bool = False,
-) -> tuple[NumpyArrayT, Scalar]:
-    """
-    Provide explicit type promotion and coercion.
-
-    Parameters
-    ----------
-    values : np.ndarray
-        The array that we may want to upcast.
-    fill_value : what we want to fill with
-    copy : bool, default True
-        If True always make a copy even if no upcast is required.
-
-    Returns
-    -------
-    values: np.ndarray
-        the original array, possibly upcast
-    fill_value:
-        the fill value, possibly upcast
-    """
-    new_dtype, fill_value = maybe_promote(values.dtype, fill_value)
-    # We get a copy in all cases _except_ (values.dtype == new_dtype and not copy)
-    upcast_values = values.astype(new_dtype, copy=copy)
-
-    # error: Incompatible return value type (got "Tuple[ndarray[Any, dtype[Any]],
-    # Union[Union[str, int, float, bool] Union[Period, Timestamp, Timedelta, Any]]]",
-    # expected "Tuple[NumpyArrayT, Union[Union[str, int, float, bool], Union[Period,
-    # Timestamp, Timedelta, Any]]]")
-    return upcast_values, fill_value  # type: ignore[return-value]
-
-
 def invalidate_string_dtypes(dtype_set: set[DtypeObj]) -> None:
     """
     Change string like dtypes to object for
@@ -1227,20 +1188,16 @@ def maybe_cast_to_datetime(
     if not is_list_like(value):
         raise TypeError("value must be listlike")
 
+    # TODO: _from_sequence would raise ValueError in cases where
+    #  _ensure_nanosecond_dtype raises TypeError
+    # Incompatible types in assignment (expression has type "Union[dtype[Any],
+    # ExtensionDtype]", variable has type "Optional[dtype[Any]]")
+    dtype = _ensure_nanosecond_dtype(dtype)  # type: ignore[assignment]
+
     if is_timedelta64_dtype(dtype):
-        # TODO: _from_sequence would raise ValueError in cases where
-        #  _ensure_nanosecond_dtype raises TypeError
-        # Incompatible types in assignment (expression has type "Union[dtype[Any],
-        # ExtensionDtype]", variable has type "Optional[dtype[Any]]")
-        dtype = _ensure_nanosecond_dtype(dtype)  # type: ignore[assignment]
         res = TimedeltaArray._from_sequence(value, dtype=dtype)
         return res
-
     else:
-        # error: Incompatible types in assignment (expression has type
-        # "Union[dtype[Any], ExtensionDtype]", variable has type "Optional[dtype[Any]]")
-        dtype = _ensure_nanosecond_dtype(dtype)  # type: ignore[assignment]
-
         try:
             dta = DatetimeArray._from_sequence(value, dtype=dtype)
         except ValueError as err:
@@ -1838,8 +1795,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
                 if isinstance(element, np.ndarray):
                     # e.g. TestDataFrameIndexingWhere::test_where_alignment
                     casted = element.astype(dtype)
-                    # TODO(np>=1.20): we can just use np.array_equal with equal_nan
-                    if array_equivalent(casted, element):
+                    if np.array_equal(casted, element, equal_nan=True):
                         return casted
                     raise LossySetitemError
 

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -206,22 +206,6 @@
 _dtype_obj = np.dtype("object")
 
 
-def _wrapped_sanitize(cls, data, dtype: DtypeObj | None, copy: bool):
-    """
-    Call sanitize_array with wrapping for differences between Index/Series.
-    """
-    try:
-        arr = sanitize_array(data, None, dtype=dtype, copy=copy, strict_ints=True)
-    except ValueError as err:
-        if "index must be specified when data is not list-like" in str(err):
-            raise cls._raise_scalar_data_error(data) from err
-        if "Data must be 1-dimensional" in str(err):
-            raise ValueError("Index data must be 1-dimensional") from err
-        raise
-    arr = ensure_wrapped_if_datetimelike(arr)
-    return arr
-
-
 def _maybe_return_indexers(meth: F) -> F:
     """
     Decorator to simplify 'return_indexers' checks in Index.join.
@@ -514,7 +498,16 @@ def __new__(
                 # Ensure we get 1-D array of tuples instead of 2D array.
                 data = com.asarray_tuplesafe(data, dtype=_dtype_obj)
 
-        arr = _wrapped_sanitize(cls, data, dtype, copy)
+        try:
+            arr = sanitize_array(data, None, dtype=dtype, copy=copy, strict_ints=True)
+        except ValueError as err:
+            if "index must be specified when data is not list-like" in str(err):
+                raise cls._raise_scalar_data_error(data) from err
+            if "Data must be 1-dimensional" in str(err):
+                raise ValueError("Index data must be 1-dimensional") from err
+            raise
+        arr = ensure_wrapped_if_datetimelike(arr)
+
         klass = cls._dtype_to_subclass(arr.dtype)
 
         # _ensure_array _may_ be unnecessary once Int64Index etc are gone
@@ -865,19 +858,11 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs):
         if any(isinstance(other, (ABCSeries, ABCDataFrame)) for other in inputs):
             return NotImplemented
 
-        # TODO(2.0) the 'and', 'or' and 'xor' dunder methods are currently set
-        # operations and not logical operations, so don't dispatch
-        # This is deprecated, so this full 'if' clause can be removed once
-        # deprecation is enforced in 2.0
-        if not (
-            method == "__call__"
-            and ufunc in (np.bitwise_and, np.bitwise_or, np.bitwise_xor)
-        ):
-            result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
-                self, ufunc, method, *inputs, **kwargs
-            )
-            if result is not NotImplemented:
-                return result
+        result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
+            self, ufunc, method, *inputs, **kwargs
+        )
+        if result is not NotImplemented:
+            return result
 
         if "out" in kwargs:
             # e.g. test_dti_isub_tdi

diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -42,7 +42,6 @@
 from pandas.core.dtypes.common import (
     is_datetime64_dtype,
     is_datetime64tz_dtype,
-    is_dtype_equal,
     is_scalar,
 )
 from pandas.core.dtypes.missing import is_valid_na_for_dtype
@@ -331,18 +330,6 @@ def __new__(
             if copy:
                 data = data.copy()
             return cls._simple_new(data, name=name)
-        elif (
-            isinstance(data, DatetimeArray)
-            and freq is lib.no_default
-            and tz is lib.no_default
-            and is_dtype_equal(data.dtype, dtype)
-        ):
-            # Reached via Index.__new__ when we call .astype
-            # TODO(2.0): special casing can be removed once _from_sequence_not_strict
-            #  no longer chokes on non-nano
-            if copy:
-                data = data.copy()
-            return cls._simple_new(data, name=name)
 
         dtarr = DatetimeArray._from_sequence_not_strict(
             data,

diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
@@ -12,6 +12,7 @@
 from pandas._typing import DtypeObj
 
 from pandas.core.dtypes.common import (
+    is_dtype_equal,
     is_scalar,
     is_timedelta64_dtype,
 )
@@ -135,13 +136,21 @@ def __new__(
                 "represent unambiguous timedelta values durations."
             )
 
-        # FIXME: need to check for dtype/data match
-        if isinstance(data, TimedeltaArray) and freq is lib.no_default:
+        if (
+            isinstance(data, TimedeltaArray)
+            and freq is lib.no_default
+            and (dtype is None or is_dtype_equal(dtype, data.dtype))
+        ):
             if copy:
                 data = data.copy()
             return cls._simple_new(data, name=name)
 
-        if isinstance(data, TimedeltaIndex) and freq is lib.no_default and name is None:
+        if (
+            isinstance(data, TimedeltaIndex)
+            and freq is lib.no_default
+            and name is None
+            and (dtype is None or is_dtype_equal(dtype, data.dtype))
+        ):
             if copy:
                 return data.copy()
             else:

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -1134,6 +1134,7 @@ def where(self, other, cond, _downcast: str | bool = "infer") -> list[Block]:
 
         return [self.make_block(result)]
 
+    @final
     def fillna(
         self, value, limit: int | None = None, inplace: bool = False, downcast=None
     ) -> list[Block]: