Skip to content

CLN: assorted #49850

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -467,12 +467,12 @@ Removal of prior version deprecations/changes
- Disallow passing non-keyword arguments to :meth:`DataFrame.where` and :meth:`Series.where` except for ``cond`` and ``other`` (:issue:`41523`)
- Disallow passing non-keyword arguments to :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` except for ``labels`` (:issue:`41491`)
- Disallow passing non-keyword arguments to :meth:`Series.rename_axis` and :meth:`DataFrame.rename_axis` except for ``mapper`` (:issue:`47587`)
- Disallow :meth:`Index.reindex` with non-unique :class:`Index` objects (:issue:`42568`)
- Disallow passing non-keyword arguments to :meth:`Series.clip` and :meth:`DataFrame.clip` (:issue:`41511`)
- Disallow passing non-keyword arguments to :meth:`Series.bfill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill` and :meth:`DataFrame.ffill` (:issue:`41508`)
- Disallow passing non-keyword arguments to :meth:`DataFrame.replace`, :meth:`Series.replace` except for ``to_replace`` and ``value`` (:issue:`47587`)
- Disallow passing non-keyword arguments to :meth:`DataFrame.sort_values` except for ``by`` (:issue:`41505`)
- Disallow passing non-keyword arguments to :meth:`Series.sort_values` (:issue:`41505`)
- Disallow :meth:`Index.reindex` with non-unique :class:`Index` objects (:issue:`42568`)
- Disallowed constructing :class:`Categorical` with scalar ``data`` (:issue:`38433`)
- Disallowed constructing :class:`CategoricalIndex` without passing ``data`` (:issue:`38944`)
- Removed :meth:`.Rolling.validate`, :meth:`.Expanding.validate`, and :meth:`.ExponentialMovingWindow.validate` (:issue:`43665`)
Expand Down
20 changes: 4 additions & 16 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ def _from_sequence_not_strict(

dtype = _validate_dt64_dtype(dtype)
# if dtype has an embedded tz, capture it
tz = validate_tz_from_dtype(dtype, tz, explicit_tz_none)
tz = _validate_tz_from_dtype(dtype, tz, explicit_tz_none)

unit = None
if dtype is not None:
Expand All @@ -338,7 +338,7 @@ def _from_sequence_not_strict(
ambiguous=ambiguous,
)
# We have to call this again after possibly inferring a tz above
validate_tz_from_dtype(dtype, tz, explicit_tz_none)
_validate_tz_from_dtype(dtype, tz, explicit_tz_none)
if tz is not None and explicit_tz_none:
raise ValueError(
"Passed data is timezone-aware, incompatible with 'tz=None'. "
Expand Down Expand Up @@ -1953,18 +1953,6 @@ def std(
# Constructor Helpers


def sequence_to_datetimes(data) -> DatetimeArray:
"""
Parse/convert the passed data to either DatetimeArray or np.ndarray[object].
"""
result, tz, freq = _sequence_to_dt64ns(data)

unit = np.datetime_data(result.dtype)[0]
dtype = tz_to_dtype(tz, unit)
dta = DatetimeArray._simple_new(result, freq=freq, dtype=dtype)
return dta


def _sequence_to_dt64ns(
data,
*,
Expand Down Expand Up @@ -2303,7 +2291,7 @@ def _validate_dt64_dtype(dtype):

Notes
-----
Unlike validate_tz_from_dtype, this does _not_ allow non-existent
Unlike _validate_tz_from_dtype, this does _not_ allow non-existent
tz errors to go through
"""
if dtype is not None:
Expand Down Expand Up @@ -2338,7 +2326,7 @@ def _validate_dt64_dtype(dtype):
return dtype


def validate_tz_from_dtype(
def _validate_tz_from_dtype(
dtype, tz: tzinfo | None, explicit_tz_none: bool = False
) -> tzinfo | None:
"""
Expand Down
6 changes: 1 addition & 5 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1884,11 +1884,7 @@ def make_sparse(
index = make_sparse_index(length, indices, kind)
sparsified_values = arr[mask]
if dtype is not None:
# error: Argument "dtype" to "astype_nansafe" has incompatible type "Union[str,
# dtype[Any]]"; expected "Union[dtype[Any], ExtensionDtype]"
sparsified_values = astype_nansafe(
sparsified_values, dtype=dtype # type: ignore[arg-type]
)
sparsified_values = astype_nansafe(sparsified_values, dtype=pandas_dtype(dtype))
# TODO: copy
return sparsified_values, index, fill_value

Expand Down
3 changes: 2 additions & 1 deletion pandas/core/arrays/sparse/dtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,8 @@ def update_dtype(self, dtype) -> SparseDtype:
if not isinstance(dtype, np.dtype):
raise TypeError("sparse arrays of extension dtypes not supported")

fvarr = astype_nansafe(np.array(self.fill_value), dtype)
fv_asarray = np.atleast_1d(np.array(self.fill_value))
fvarr = astype_nansafe(fv_asarray, dtype)
# NB: not fv_0d.item(), as that casts dt64->int
fill_value = fvarr[0]
dtype = cls(dtype, fill_value=fill_value)
Expand Down
8 changes: 6 additions & 2 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
maybe_cast_to_integer_array,
maybe_convert_platform,
maybe_infer_to_datetimelike,
maybe_upcast,
maybe_promote,
)
from pandas.core.dtypes.common import (
is_datetime64_ns_dtype,
Expand Down Expand Up @@ -484,7 +484,11 @@ def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray:
"""
mask = ma.getmaskarray(data)
if mask.any():
data, fill_value = maybe_upcast(data, copy=True)
dtype, fill_value = maybe_promote(data.dtype, np.nan)
dtype = cast(np.dtype, dtype)
# Incompatible types in assignment (expression has type "ndarray[Any,
# dtype[Any]]", variable has type "MaskedArray[Any, Any]")
data = data.astype(dtype, copy=True) # type: ignore[assignment]
data.soften_mask() # set hardmask False if it was True
data[mask] = fill_value
else:
Expand Down
3 changes: 0 additions & 3 deletions pandas/core/dtypes/astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,6 @@ def astype_nansafe(
The dtype was a datetime64/timedelta64 dtype, but it had no unit.
"""

# We get here with 0-dim from sparse
arr = np.atleast_1d(arr)

# dispatch on extension dtype if needed
if isinstance(dtype, ExtensionDtype):
return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy)
Expand Down
60 changes: 8 additions & 52 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@
)
from pandas.core.dtypes.inference import is_list_like
from pandas.core.dtypes.missing import (
array_equivalent,
is_valid_na_for_dtype,
isna,
na_value_for_dtype,
Expand Down Expand Up @@ -404,7 +403,7 @@ def trans(x):
elif dtype.kind == result.dtype.kind == "c":
new_result = result.astype(dtype)

if array_equivalent(new_result, result):
if np.array_equal(new_result, result, equal_nan=True):
# TODO: use tolerance like we do for float?
return new_result

Expand Down Expand Up @@ -543,10 +542,6 @@ def maybe_promote(dtype: np.dtype, fill_value=np.nan):
ValueError
If fill_value is a non-scalar and dtype is not object.
"""
# TODO(2.0): need to directly use the non-cached version as long as we
# possibly raise a deprecation warning for datetime dtype
if dtype.kind == "M":
return _maybe_promote(dtype, fill_value)
# for performance, we are using a cached version of the actual implementation
# of the function in _maybe_promote. However, this doesn't always work (in case
# of non-hashable arguments), so we fallback to the actual implementation if needed
Expand Down Expand Up @@ -923,40 +918,6 @@ def _maybe_infer_dtype_type(element):
return tipo


def maybe_upcast(
values: NumpyArrayT,
fill_value: Scalar = np.nan,
copy: bool = False,
) -> tuple[NumpyArrayT, Scalar]:
"""
Provide explicit type promotion and coercion.

Parameters
----------
values : np.ndarray
The array that we may want to upcast.
fill_value : what we want to fill with
copy : bool, default True
If True always make a copy even if no upcast is required.

Returns
-------
values: np.ndarray
the original array, possibly upcast
fill_value:
the fill value, possibly upcast
"""
new_dtype, fill_value = maybe_promote(values.dtype, fill_value)
# We get a copy in all cases _except_ (values.dtype == new_dtype and not copy)
upcast_values = values.astype(new_dtype, copy=copy)

# error: Incompatible return value type (got "Tuple[ndarray[Any, dtype[Any]],
# Union[Union[str, int, float, bool] Union[Period, Timestamp, Timedelta, Any]]]",
# expected "Tuple[NumpyArrayT, Union[Union[str, int, float, bool], Union[Period,
# Timestamp, Timedelta, Any]]]")
return upcast_values, fill_value # type: ignore[return-value]


def invalidate_string_dtypes(dtype_set: set[DtypeObj]) -> None:
"""
Change string like dtypes to object for
Expand Down Expand Up @@ -1227,20 +1188,16 @@ def maybe_cast_to_datetime(
if not is_list_like(value):
raise TypeError("value must be listlike")

# TODO: _from_sequence would raise ValueError in cases where
# _ensure_nanosecond_dtype raises TypeError
# Incompatible types in assignment (expression has type "Union[dtype[Any],
# ExtensionDtype]", variable has type "Optional[dtype[Any]]")
dtype = _ensure_nanosecond_dtype(dtype) # type: ignore[assignment]

if is_timedelta64_dtype(dtype):
# TODO: _from_sequence would raise ValueError in cases where
# _ensure_nanosecond_dtype raises TypeError
# Incompatible types in assignment (expression has type "Union[dtype[Any],
# ExtensionDtype]", variable has type "Optional[dtype[Any]]")
dtype = _ensure_nanosecond_dtype(dtype) # type: ignore[assignment]
res = TimedeltaArray._from_sequence(value, dtype=dtype)
return res

else:
# error: Incompatible types in assignment (expression has type
# "Union[dtype[Any], ExtensionDtype]", variable has type "Optional[dtype[Any]]")
dtype = _ensure_nanosecond_dtype(dtype) # type: ignore[assignment]

try:
dta = DatetimeArray._from_sequence(value, dtype=dtype)
except ValueError as err:
Expand Down Expand Up @@ -1838,8 +1795,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
if isinstance(element, np.ndarray):
# e.g. TestDataFrameIndexingWhere::test_where_alignment
casted = element.astype(dtype)
# TODO(np>=1.20): we can just use np.array_equal with equal_nan
if array_equivalent(casted, element):
if np.array_equal(casted, element, equal_nan=True):
return casted
raise LossySetitemError

Expand Down
45 changes: 15 additions & 30 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,22 +206,6 @@
_dtype_obj = np.dtype("object")


def _wrapped_sanitize(cls, data, dtype: DtypeObj | None, copy: bool):
"""
Call sanitize_array with wrapping for differences between Index/Series.
"""
try:
arr = sanitize_array(data, None, dtype=dtype, copy=copy, strict_ints=True)
except ValueError as err:
if "index must be specified when data is not list-like" in str(err):
raise cls._raise_scalar_data_error(data) from err
if "Data must be 1-dimensional" in str(err):
raise ValueError("Index data must be 1-dimensional") from err
raise
arr = ensure_wrapped_if_datetimelike(arr)
return arr


def _maybe_return_indexers(meth: F) -> F:
"""
Decorator to simplify 'return_indexers' checks in Index.join.
Expand Down Expand Up @@ -514,7 +498,16 @@ def __new__(
# Ensure we get 1-D array of tuples instead of 2D array.
data = com.asarray_tuplesafe(data, dtype=_dtype_obj)

arr = _wrapped_sanitize(cls, data, dtype, copy)
try:
arr = sanitize_array(data, None, dtype=dtype, copy=copy, strict_ints=True)
except ValueError as err:
if "index must be specified when data is not list-like" in str(err):
raise cls._raise_scalar_data_error(data) from err
if "Data must be 1-dimensional" in str(err):
raise ValueError("Index data must be 1-dimensional") from err
raise
arr = ensure_wrapped_if_datetimelike(arr)

klass = cls._dtype_to_subclass(arr.dtype)

# _ensure_array _may_ be unnecessary once Int64Index etc are gone
Expand Down Expand Up @@ -865,19 +858,11 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs):
if any(isinstance(other, (ABCSeries, ABCDataFrame)) for other in inputs):
return NotImplemented

# TODO(2.0) the 'and', 'or' and 'xor' dunder methods are currently set
# operations and not logical operations, so don't dispatch
# This is deprecated, so this full 'if' clause can be removed once
# deprecation is enforced in 2.0
if not (
method == "__call__"
and ufunc in (np.bitwise_and, np.bitwise_or, np.bitwise_xor)
):
result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
self, ufunc, method, *inputs, **kwargs
)
if result is not NotImplemented:
return result
result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
self, ufunc, method, *inputs, **kwargs
)
if result is not NotImplemented:
return result

if "out" in kwargs:
# e.g. test_dti_isub_tdi
Expand Down
13 changes: 0 additions & 13 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
from pandas.core.dtypes.common import (
is_datetime64_dtype,
is_datetime64tz_dtype,
is_dtype_equal,
is_scalar,
)
from pandas.core.dtypes.missing import is_valid_na_for_dtype
Expand Down Expand Up @@ -331,18 +330,6 @@ def __new__(
if copy:
data = data.copy()
return cls._simple_new(data, name=name)
elif (
isinstance(data, DatetimeArray)
and freq is lib.no_default
and tz is lib.no_default
and is_dtype_equal(data.dtype, dtype)
):
# Reached via Index.__new__ when we call .astype
# TODO(2.0): special casing can be removed once _from_sequence_not_strict
# no longer chokes on non-nano
if copy:
data = data.copy()
return cls._simple_new(data, name=name)

dtarr = DatetimeArray._from_sequence_not_strict(
data,
Expand Down
15 changes: 12 additions & 3 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pandas._typing import DtypeObj

from pandas.core.dtypes.common import (
is_dtype_equal,
is_scalar,
is_timedelta64_dtype,
)
Expand Down Expand Up @@ -135,13 +136,21 @@ def __new__(
"represent unambiguous timedelta values durations."
)

# FIXME: need to check for dtype/data match
if isinstance(data, TimedeltaArray) and freq is lib.no_default:
if (
isinstance(data, TimedeltaArray)
and freq is lib.no_default
and (dtype is None or is_dtype_equal(dtype, data.dtype))
):
if copy:
data = data.copy()
return cls._simple_new(data, name=name)

if isinstance(data, TimedeltaIndex) and freq is lib.no_default and name is None:
if (
isinstance(data, TimedeltaIndex)
and freq is lib.no_default
and name is None
and (dtype is None or is_dtype_equal(dtype, data.dtype))
):
if copy:
return data.copy()
else:
Expand Down
1 change: 1 addition & 0 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1134,6 +1134,7 @@ def where(self, other, cond, _downcast: str | bool = "infer") -> list[Block]:

return [self.make_block(result)]

@final
def fillna(
self, value, limit: int | None = None, inplace: bool = False, downcast=None
) -> list[Block]:
Expand Down
Loading