From a542cb2700f2a4d52e118975a3b76c800062e504 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 22 Nov 2022 16:54:08 -0800 Subject: [PATCH 1/5] CLN: assorted --- doc/source/whatsnew/v2.0.0.rst | 2 +- pandas/_libs/tslibs/vectorized.pyx | 7 --- pandas/core/arrays/datetimes.py | 20 ++----- pandas/core/arrays/sparse/array.py | 6 +- pandas/core/arrays/sparse/dtype.py | 3 +- pandas/core/construction.py | 5 +- pandas/core/dtypes/astype.py | 3 - pandas/core/dtypes/cast.py | 60 +++---------------- pandas/core/indexes/base.py | 45 +++++--------- pandas/core/indexes/datetimes.py | 13 ---- pandas/core/indexes/timedeltas.py | 15 ++++- pandas/core/internals/blocks.py | 1 + pandas/core/reshape/encoding.py | 10 ++-- pandas/io/pytables.py | 9 +-- pandas/tests/extension/base/dtype.py | 23 ++----- pandas/tests/frame/test_constructors.py | 13 +--- pandas/tests/groupby/test_function.py | 9 +++ .../tests/groupby/transform/test_transform.py | 3 +- .../indexes/datetimes/methods/test_snap.py | 16 +---- pandas/tests/indexing/test_loc.py | 1 - pandas/tests/io/test_stata.py | 10 +--- pandas/tseries/frequencies.py | 21 +++++-- pandas/util/_test_decorators.py | 16 ++--- 23 files changed, 96 insertions(+), 215 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 4d5c676e941ea..0a30ee7b8586f 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -466,12 +466,12 @@ Removal of prior version deprecations/changes - Disallow passing non-keyword arguments to :meth:`DataFrame.where` and :meth:`Series.where` except for ``cond`` and ``other`` (:issue:`41523`) - Disallow passing non-keyword arguments to :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` except for ``labels`` (:issue:`41491`) - Disallow passing non-keyword arguments to :meth:`Series.rename_axis` and :meth:`DataFrame.rename_axis` except for ``mapper`` (:issue:`47587`) -- Disallow :meth:`Index.reindex` with non-unique :class:`Index` objects (:issue:`42568`) - Disallow passing non-keyword arguments to :meth:`Series.clip` and :meth:`DataFrame.clip` (:issue:`41511`) - Disallow passing non-keyword arguments to :meth:`Series.bfill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill` and :meth:`DataFrame.ffill` (:issue:`41508`) - Disallow passing non-keyword arguments to :meth:`DataFrame.replace`, :meth:`Series.replace` except for ``to_replace`` and ``value`` (:issue:`47587`) - Disallow passing non-keyword arguments to :meth:`DataFrame.sort_values` except for ``by`` (:issue:`41505`) - Disallow passing non-keyword arguments to :meth:`Series.sort_values` (:issue:`41505`) +- Disallow :meth:`Index.reindex` with non-unique :class:`Index` objects (:issue:`42568`) - Disallowed constructing :class:`Categorical` with scalar ``data`` (:issue:`38433`) - Disallowed constructing :class:`CategoricalIndex` without passing ``data`` (:issue:`38944`) - Removed :meth:`.Rolling.validate`, :meth:`.Expanding.validate`, and :meth:`.ExponentialMovingWindow.validate` (:issue:`43665`) diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index b95cebd60a847..4763ea2f6b748 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -106,13 +106,6 @@ def ints_to_pydatetime( stamps : array of i8 tz : str, optional convert to this timezone - fold : bint, default is 0 - Due to daylight saving time, one wall clock time can occur twice - when shifting from summer to winter time; fold describes whether the - datetime-like corresponds to the first (0) or the second time (1) - the wall clock hits the ambiguous time - - .. versionadded:: 1.1.0 box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime' * If datetime, convert to datetime.datetime * If date, convert to datetime.date diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 4ea6b9ceee833..70656dcbb09b4 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -319,7 +319,7 @@ def _from_sequence_not_strict( dtype = _validate_dt64_dtype(dtype) # if dtype has an embedded tz, capture it - tz = validate_tz_from_dtype(dtype, tz, explicit_tz_none) + tz = _validate_tz_from_dtype(dtype, tz, explicit_tz_none) unit = None if dtype is not None: @@ -338,7 +338,7 @@ def _from_sequence_not_strict( ambiguous=ambiguous, ) # We have to call this again after possibly inferring a tz above - validate_tz_from_dtype(dtype, tz, explicit_tz_none) + _validate_tz_from_dtype(dtype, tz, explicit_tz_none) if tz is not None and explicit_tz_none: raise ValueError( "Passed data is timezone-aware, incompatible with 'tz=None'. " @@ -1953,18 +1953,6 @@ def std( # Constructor Helpers -def sequence_to_datetimes(data) -> DatetimeArray: - """ - Parse/convert the passed data to either DatetimeArray or np.ndarray[object]. - """ - result, tz, freq = _sequence_to_dt64ns(data) - - unit = np.datetime_data(result.dtype)[0] - dtype = tz_to_dtype(tz, unit) - dta = DatetimeArray._simple_new(result, freq=freq, dtype=dtype) - return dta - - def _sequence_to_dt64ns( data, *, @@ -2303,7 +2291,7 @@ def _validate_dt64_dtype(dtype): Notes ----- - Unlike validate_tz_from_dtype, this does _not_ allow non-existent + Unlike _validate_tz_from_dtype, this does _not_ allow non-existent tz errors to go through """ if dtype is not None: @@ -2338,7 +2326,7 @@ def _validate_dt64_dtype(dtype): return dtype -def validate_tz_from_dtype( +def _validate_tz_from_dtype( dtype, tz: tzinfo | None, explicit_tz_none: bool = False ) -> tzinfo | None: """ diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 114dc68d0840b..2462317321a04 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1884,11 +1884,7 @@ def make_sparse( index = make_sparse_index(length, indices, kind) sparsified_values = arr[mask] if dtype is not None: - # error: Argument "dtype" to "astype_nansafe" has incompatible type "Union[str, - # dtype[Any]]"; expected "Union[dtype[Any], ExtensionDtype]" - sparsified_values = astype_nansafe( - sparsified_values, dtype=dtype # type: ignore[arg-type] - ) + sparsified_values = astype_nansafe(sparsified_values, dtype=pandas_dtype(dtype)) # TODO: copy return sparsified_values, index, fill_value diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py index eaed6257736ba..f9a0ac6d38b6d 100644 --- a/pandas/core/arrays/sparse/dtype.py +++ b/pandas/core/arrays/sparse/dtype.py @@ -354,7 +354,8 @@ def update_dtype(self, dtype) -> SparseDtype: if not isinstance(dtype, np.dtype): raise TypeError("sparse arrays of extension dtypes not supported") - fvarr = astype_nansafe(np.array(self.fill_value), dtype) + fv_asarray = np.atleast_1d(np.array(self.fill_value)) + fvarr = astype_nansafe(fv_asarray, dtype) # NB: not fv_0d.item(), as that casts dt64->int fill_value = fvarr[0] dtype = cls(dtype, fill_value=fill_value) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 8aa41e939a809..6e1524cb7df32 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -40,7 +40,7 @@ maybe_cast_to_integer_array, maybe_convert_platform, maybe_infer_to_datetimelike, - maybe_upcast, + maybe_promote, ) from pandas.core.dtypes.common import ( is_datetime64_ns_dtype, @@ -484,7 +484,8 @@ def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray: """ mask = ma.getmaskarray(data) if mask.any(): - data, fill_value = maybe_upcast(data, copy=True) + dtype, fill_value = maybe_promote(data.dtype, np.nan) + data = data.astype(dtype, copy=True) data.soften_mask() # set hardmask False if it was True data[mask] = fill_value else: diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index 858882fd28f0d..7b5c77af7864b 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -78,9 +78,6 @@ def astype_nansafe( The dtype was a datetime64/timedelta64 dtype, but it had no unit. """ - # We get here with 0-dim from sparse - arr = np.atleast_1d(arr) - # dispatch on extension dtype if needed if isinstance(dtype, ExtensionDtype): return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 11c2bf8711ad1..a668dec6e073e 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -84,7 +84,6 @@ ) from pandas.core.dtypes.inference import is_list_like from pandas.core.dtypes.missing import ( - array_equivalent, is_valid_na_for_dtype, isna, na_value_for_dtype, @@ -404,7 +403,7 @@ def trans(x): elif dtype.kind == result.dtype.kind == "c": new_result = result.astype(dtype) - if array_equivalent(new_result, result): + if np.array_equal(new_result, result, equal_nan=True): # TODO: use tolerance like we do for float? return new_result @@ -543,10 +542,6 @@ def maybe_promote(dtype: np.dtype, fill_value=np.nan): ValueError If fill_value is a non-scalar and dtype is not object. """ - # TODO(2.0): need to directly use the non-cached version as long as we - # possibly raise a deprecation warning for datetime dtype - if dtype.kind == "M": - return _maybe_promote(dtype, fill_value) # for performance, we are using a cached version of the actual implementation # of the function in _maybe_promote. However, this doesn't always work (in case # of non-hashable arguments), so we fallback to the actual implementation if needed @@ -923,40 +918,6 @@ def _maybe_infer_dtype_type(element): return tipo -def maybe_upcast( - values: NumpyArrayT, - fill_value: Scalar = np.nan, - copy: bool = False, -) -> tuple[NumpyArrayT, Scalar]: - """ - Provide explicit type promotion and coercion. - - Parameters - ---------- - values : np.ndarray - The array that we may want to upcast. - fill_value : what we want to fill with - copy : bool, default True - If True always make a copy even if no upcast is required. - - Returns - ------- - values: np.ndarray - the original array, possibly upcast - fill_value: - the fill value, possibly upcast - """ - new_dtype, fill_value = maybe_promote(values.dtype, fill_value) - # We get a copy in all cases _except_ (values.dtype == new_dtype and not copy) - upcast_values = values.astype(new_dtype, copy=copy) - - # error: Incompatible return value type (got "Tuple[ndarray[Any, dtype[Any]], - # Union[Union[str, int, float, bool] Union[Period, Timestamp, Timedelta, Any]]]", - # expected "Tuple[NumpyArrayT, Union[Union[str, int, float, bool], Union[Period, - # Timestamp, Timedelta, Any]]]") - return upcast_values, fill_value # type: ignore[return-value] - - def invalidate_string_dtypes(dtype_set: set[DtypeObj]) -> None: """ Change string like dtypes to object for @@ -1227,20 +1188,16 @@ def maybe_cast_to_datetime( if not is_list_like(value): raise TypeError("value must be listlike") + # TODO: _from_sequence would raise ValueError in cases where + # _ensure_nanosecond_dtype raises TypeError + # Incompatible types in assignment (expression has type "Union[dtype[Any], + # ExtensionDtype]", variable has type "Optional[dtype[Any]]") + dtype = _ensure_nanosecond_dtype(dtype) # type: ignore[assignment] + if is_timedelta64_dtype(dtype): - # TODO: _from_sequence would raise ValueError in cases where - # _ensure_nanosecond_dtype raises TypeError - # Incompatible types in assignment (expression has type "Union[dtype[Any], - # ExtensionDtype]", variable has type "Optional[dtype[Any]]") - dtype = _ensure_nanosecond_dtype(dtype) # type: ignore[assignment] res = TimedeltaArray._from_sequence(value, dtype=dtype) return res - else: - # error: Incompatible types in assignment (expression has type - # "Union[dtype[Any], ExtensionDtype]", variable has type "Optional[dtype[Any]]") - dtype = _ensure_nanosecond_dtype(dtype) # type: ignore[assignment] - try: dta = DatetimeArray._from_sequence(value, dtype=dtype) except ValueError as err: @@ -1838,8 +1795,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: if isinstance(element, np.ndarray): # e.g. TestDataFrameIndexingWhere::test_where_alignment casted = element.astype(dtype) - # TODO(np>=1.20): we can just use np.array_equal with equal_nan - if array_equivalent(casted, element): + if np.array_equal(casted, element, equal_nan=True): return casted raise LossySetitemError diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 0bc568fb122ed..2c719dc79b268 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -206,22 +206,6 @@ _dtype_obj = np.dtype("object") -def _wrapped_sanitize(cls, data, dtype: DtypeObj | None, copy: bool): - """ - Call sanitize_array with wrapping for differences between Index/Series. - """ - try: - arr = sanitize_array(data, None, dtype=dtype, copy=copy, strict_ints=True) - except ValueError as err: - if "index must be specified when data is not list-like" in str(err): - raise cls._raise_scalar_data_error(data) from err - if "Data must be 1-dimensional" in str(err): - raise ValueError("Index data must be 1-dimensional") from err - raise - arr = ensure_wrapped_if_datetimelike(arr) - return arr - - def _maybe_return_indexers(meth: F) -> F: """ Decorator to simplify 'return_indexers' checks in Index.join. @@ -514,7 +498,16 @@ def __new__( # Ensure we get 1-D array of tuples instead of 2D array. data = com.asarray_tuplesafe(data, dtype=_dtype_obj) - arr = _wrapped_sanitize(cls, data, dtype, copy) + try: + arr = sanitize_array(data, None, dtype=dtype, copy=copy, strict_ints=True) + except ValueError as err: + if "index must be specified when data is not list-like" in str(err): + raise cls._raise_scalar_data_error(data) from err + if "Data must be 1-dimensional" in str(err): + raise ValueError("Index data must be 1-dimensional") from err + raise + arr = ensure_wrapped_if_datetimelike(arr) + klass = cls._dtype_to_subclass(arr.dtype) # _ensure_array _may_ be unnecessary once Int64Index etc are gone @@ -865,19 +858,11 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs): if any(isinstance(other, (ABCSeries, ABCDataFrame)) for other in inputs): return NotImplemented - # TODO(2.0) the 'and', 'or' and 'xor' dunder methods are currently set - # operations and not logical operations, so don't dispatch - # This is deprecated, so this full 'if' clause can be removed once - # deprecation is enforced in 2.0 - if not ( - method == "__call__" - and ufunc in (np.bitwise_and, np.bitwise_or, np.bitwise_xor) - ): - result = arraylike.maybe_dispatch_ufunc_to_dunder_op( - self, ufunc, method, *inputs, **kwargs - ) - if result is not NotImplemented: - return result + result = arraylike.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result if "out" in kwargs: # e.g. test_dti_isub_tdi diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index b95a290a43189..5709f94e2ccd5 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -42,7 +42,6 @@ from pandas.core.dtypes.common import ( is_datetime64_dtype, is_datetime64tz_dtype, - is_dtype_equal, is_scalar, ) from pandas.core.dtypes.missing import is_valid_na_for_dtype @@ -331,18 +330,6 @@ def __new__( if copy: data = data.copy() return cls._simple_new(data, name=name) - elif ( - isinstance(data, DatetimeArray) - and freq is lib.no_default - and tz is lib.no_default - and is_dtype_equal(data.dtype, dtype) - ): - # Reached via Index.__new__ when we call .astype - # TODO(2.0): special casing can be removed once _from_sequence_not_strict - # no longer chokes on non-nano - if copy: - data = data.copy() - return cls._simple_new(data, name=name) dtarr = DatetimeArray._from_sequence_not_strict( data, diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 851ec5d54711d..82ac2bd139b1f 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -12,6 +12,7 @@ from pandas._typing import DtypeObj from pandas.core.dtypes.common import ( + is_dtype_equal, is_scalar, is_timedelta64_dtype, ) @@ -135,13 +136,21 @@ def __new__( "represent unambiguous timedelta values durations." ) - # FIXME: need to check for dtype/data match - if isinstance(data, TimedeltaArray) and freq is lib.no_default: + if ( + isinstance(data, TimedeltaArray) + and freq is lib.no_default + and (dtype is None or is_dtype_equal(dtype, data.dtype)) + ): if copy: data = data.copy() return cls._simple_new(data, name=name) - if isinstance(data, TimedeltaIndex) and freq is lib.no_default and name is None: + if ( + isinstance(data, TimedeltaIndex) + and freq is lib.no_default + and name is None + and (dtype is None or is_dtype_equal(dtype, data.dtype)) + ): if copy: return data.copy() else: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 3d777f5579fff..f1856fce83160 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1134,6 +1134,7 @@ def where(self, other, cond, _downcast: str | bool = "infer") -> list[Block]: return [self.make_block(result)] + @final def fillna( self, value, limit: int | None = None, inplace: bool = False, downcast=None ) -> list[Block]: diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py index fa1d6783e8f41..7e45e587ca84a 100644 --- a/pandas/core/reshape/encoding.py +++ b/pandas/core/reshape/encoding.py @@ -10,7 +10,7 @@ import numpy as np from pandas._libs.sparse import IntIndex -from pandas._typing import Dtype +from pandas._typing import NpDtype from pandas.core.dtypes.common import ( is_integer_dtype, @@ -36,7 +36,7 @@ def get_dummies( columns=None, sparse: bool = False, drop_first: bool = False, - dtype: Dtype | None = None, + dtype: NpDtype | None = None, ) -> DataFrame: """ Convert categorical variable into dummy/indicator variables. @@ -231,7 +231,7 @@ def _get_dummies_1d( dummy_na: bool = False, sparse: bool = False, drop_first: bool = False, - dtype: Dtype | None = None, + dtype: NpDtype | None = None, ) -> DataFrame: from pandas.core.reshape.concat import concat @@ -240,9 +240,7 @@ def _get_dummies_1d( if dtype is None: dtype = np.dtype(bool) - # error: Argument 1 to "dtype" has incompatible type "Union[ExtensionDtype, str, - # dtype[Any], Type[object]]"; expected "Type[Any]" - dtype = np.dtype(dtype) # type: ignore[arg-type] + dtype = np.dtype(dtype) if is_object_dtype(dtype): raise ValueError("dtype=object is not a valid dtype for get_dummies") diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 5c5e9501c111f..d1d69499fc089 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4040,16 +4040,13 @@ def get_blk_items(mgr): axis, axis_labels = new_non_index_axes[0] new_labels = Index(axis_labels).difference(Index(data_columns)) mgr = frame.reindex(new_labels, axis=axis)._mgr + mgr = cast(BlockManager, mgr) - # error: Item "ArrayManager" of "Union[ArrayManager, BlockManager]" has no - # attribute "blocks" - blocks = list(mgr.blocks) # type: ignore[union-attr] + blocks = list(mgr.blocks) blk_items = get_blk_items(mgr) for c in data_columns: mgr = frame.reindex([c], axis=axis)._mgr - # error: Item "ArrayManager" of "Union[ArrayManager, BlockManager]" has - # no attribute "blocks" - blocks.extend(mgr.blocks) # type: ignore[union-attr] + blocks.extend(mgr.blocks) blk_items.extend(get_blk_items(mgr)) # reorder the blocks in the same order as the existing table if we can diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py index 32a9246264d69..1bb901a4d615e 100644 --- a/pandas/tests/extension/base/dtype.py +++ b/pandas/tests/extension/base/dtype.py @@ -1,5 +1,3 @@ -import warnings - import numpy as np import pytest @@ -71,23 +69,12 @@ def test_check_dtype(self, data): df = pd.DataFrame( {"A": pd.Series(data, dtype=dtype), "B": data, "C": "foo", "D": 1} ) + result = df.dtypes == str(dtype) - # TODO(numpy-1.20): This warnings filter and if block can be removed - # once we require numpy>=1.20 - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - result = df.dtypes == str(dtype) - # NumPy>=1.20.0, but not pandas.compat.numpy till there - # is a wheel available with this change. - try: - new_numpy_behavior = np.dtype("int64") != "Int64" - except TypeError: - new_numpy_behavior = True - - if dtype.name == "Int64" and not new_numpy_behavior: - expected = pd.Series([True, True, False, True], index=list("ABCD")) - else: - expected = pd.Series([True, True, False, False], index=list("ABCD")) + # pre-numpy 1.20 this comparison could raise or in some cases + # come back True + assert np.dtype("int64") != "Int64" + expected = pd.Series([True, True, False, False], index=list("ABCD")) self.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index b6c6792355fc7..767912a7d2667 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -870,12 +870,7 @@ def test_constructor_dict_timedelta64_index(self, klass, name): index=[Timedelta(td, "D") for td in td_as_int], ) - if name == "Timedelta[s]": - # TODO(2.0): passing index here shouldn't be necessary, is for now - # otherwise we raise in _extract_index - result = DataFrame(data, index=expected.index) - else: - result = DataFrame(data) + result = DataFrame(data) tm.assert_frame_equal(result, expected) @@ -3139,12 +3134,6 @@ def test_frame_allow_non_nano(self, arr): df = DataFrame(arr) assert df.dtypes[0] == arr.dtype - @pytest.mark.xfail( - # TODO(2.0): xfail should become unnecessary - strict=False, - reason="stack_arrays converts TDA to ndarray, then goes " - "through ensure_wrapped_if_datetimelike", - ) def test_frame_from_dict_allow_non_nano(self, arr): df = DataFrame({0: arr}) assert df.dtypes[0] == arr.dtype diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index bc84a877cd75f..e74c008d05dbb 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -137,6 +137,9 @@ def df(self): ) return df + @pytest.mark.filterwarnings( + "ignore:The default value of numeric_only:FutureWarning" + ) @pytest.mark.parametrize("method", ["mean", "median"]) def test_averages(self, df, method): # mean / median @@ -214,6 +217,9 @@ def test_first_last(self, df, method): self._check(df, method, expected_columns, expected_columns_numeric) + @pytest.mark.filterwarnings( + "ignore:The default value of numeric_only:FutureWarning" + ) @pytest.mark.parametrize("method", ["sum", "cumsum"]) def test_sum_cumsum(self, df, method): @@ -227,6 +233,9 @@ def test_sum_cumsum(self, df, method): self._check(df, method, expected_columns, expected_columns_numeric) + @pytest.mark.filterwarnings( + "ignore:The default value of numeric_only:FutureWarning" + ) @pytest.mark.parametrize("method", ["prod", "cumprod"]) def test_prod_cumprod(self, df, method): diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index ad8051792266e..d6a613e678b6f 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -497,7 +497,8 @@ def test_transform_coercion(): # in 2.0 np.mean on a DataFrame is equivalent to frame.mean(axis=None) # which not gives a scalar instead of Series - result = g.transform(lambda x: np.mean(x)) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = g.transform(lambda x: np.mean(x)) tm.assert_frame_equal(result, expected) with tm.assert_produces_warning(None): diff --git a/pandas/tests/indexes/datetimes/methods/test_snap.py b/pandas/tests/indexes/datetimes/methods/test_snap.py index 755f6cff1278f..bceb37fdc90cf 100644 --- a/pandas/tests/indexes/datetimes/methods/test_snap.py +++ b/pandas/tests/indexes/datetimes/methods/test_snap.py @@ -8,21 +8,9 @@ def astype_non_nano(dti_nano, unit): - # TODO(2.0): remove once DTI/DTA.astype supports non-nano - if unit == "ns": - return dti_nano - - dta_nano = dti_nano._data - arr_nano = dta_nano._ndarray - - arr = arr_nano.astype(f"M8[{unit}]") - if dti_nano.tz is None: - dtype = arr.dtype - else: - dtype = type(dti_nano.dtype)(tz=dti_nano.tz, unit=unit) - dta = type(dta_nano)._simple_new(arr, dtype=dtype) + # TODO(2.0): remove once DTI supports as_unit + dta = dti_nano._data.as_unit(unit) dti = DatetimeIndex(dta, name=dti_nano.name) - assert dti.dtype == dtype return dti diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 26eb7532adfa4..11cbcfe231928 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -435,7 +435,6 @@ def test_loc_to_fail(self): np.random.random((3, 3)), index=["a", "b", "c"], columns=["e", "f", "g"] ) - # raise a KeyError? msg = ( r"\"None of \[Int64Index\(\[1, 2\], dtype='int64'\)\] are " r"in the \[index\]\"" diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 32b616cd9ab9b..535c2d3e7e0f3 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -456,19 +456,13 @@ def test_read_write_reread_dta14(self, file, parsed_114, version, datapath): parsed = self.read_dta(file) parsed.index.name = "index" - expected = self.read_csv(datapath("io", "data", "stata", "stata5.csv")) - cols = ["byte_", "int_", "long_", "float_", "double_"] - for col in cols: - expected[col] = expected[col]._convert(datetime=True, numeric=True) - expected["float_"] = expected["float_"].astype(np.float32) - expected["date_td"] = pd.to_datetime(expected["date_td"], errors="coerce") - tm.assert_frame_equal(parsed_114, parsed) with tm.ensure_clean() as path: parsed_114.to_stata(path, convert_dates={"date_td": "td"}, version=version) written_and_read_again = self.read_dta(path) - tm.assert_frame_equal(written_and_read_again.set_index("index"), parsed_114) + + tm.assert_frame_equal(written_and_read_again.set_index("index"), parsed_114) @pytest.mark.parametrize( "file", ["stata6_113", "stata6_114", "stata6_115", "stata6_117"] diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 2796db89d60f3..d422b6db409d1 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -1,5 +1,10 @@ from __future__ import annotations +from typing import ( + TYPE_CHECKING, + cast, +) + import numpy as np from pandas._libs.algos import unique_deltas @@ -42,6 +47,13 @@ from pandas.core.algorithms import unique +if TYPE_CHECKING: + from pandas import ( + DatetimeIndex, + TimedeltaIndex, + ) + from pandas.core.arrays import TimedeltaArray + # --------------------------------------------------------------------- # Offset names ("time rules") and related functions @@ -180,18 +192,15 @@ class _FrequencyInferer: Not sure if I can avoid the state machine here """ - def __init__(self, index) -> None: + def __init__(self, index: DatetimeIndex | TimedeltaIndex | TimedeltaArray) -> None: self.index = index self.i8values = index.asi8 # For get_unit_from_dtype we need the dtype to the underlying ndarray, # which for tz-aware is not the same as index.dtype if isinstance(index, ABCIndex): - # error: Item "ndarray[Any, Any]" of "Union[ExtensionArray, - # ndarray[Any, Any]]" has no attribute "_ndarray" - self._creso = get_unit_from_dtype( - index._data._ndarray.dtype # type: ignore[union-attr] - ) + index = cast("DatetimeIndex | TimedeltaIndex", index) + self._creso = get_unit_from_dtype(index._data._ndarray.dtype) else: # otherwise we have DTA/TDA self._creso = get_unit_from_dtype(index._ndarray.dtype) diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index ca856fe2482ca..33830e96342f3 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -112,20 +112,20 @@ def safe_import(mod_name: str, min_version: str | None = None): return False -def _skip_if_no_mpl() -> bool | None: +def _skip_if_no_mpl() -> bool: mod = safe_import("matplotlib") if mod: mod.use("Agg") - return None + return False else: return True -def _skip_if_not_us_locale() -> bool | None: +def _skip_if_not_us_locale() -> bool: lang, _ = locale.getlocale() if lang != "en_US": return True - return None + return False def _skip_if_no_scipy() -> bool: @@ -193,18 +193,14 @@ def skip_if_no(package: str, min_version: str | None = None): ) -# error: Argument 1 to "__call__" of "_SkipifMarkDecorator" has incompatible type -# "Optional[bool]"; expected "Union[str, bool]" skip_if_no_mpl = pytest.mark.skipif( - _skip_if_no_mpl(), reason="Missing matplotlib dependency" # type: ignore[arg-type] + _skip_if_no_mpl(), reason="Missing matplotlib dependency" ) skip_if_mpl = pytest.mark.skipif(not _skip_if_no_mpl(), reason="matplotlib is present") skip_if_32bit = pytest.mark.skipif(not IS64, reason="skipping for 32 bit") skip_if_windows = pytest.mark.skipif(is_platform_windows(), reason="Running on Windows") -# error: Argument 1 to "__call__" of "_SkipifMarkDecorator" has incompatible type -# "Optional[bool]"; expected "Union[str, bool]" skip_if_not_us_locale = pytest.mark.skipif( - _skip_if_not_us_locale(), # type: ignore[arg-type] + _skip_if_not_us_locale(), reason=f"Specific locale is set {locale.getlocale()[0]}", ) skip_if_no_scipy = pytest.mark.skipif( From d9e872936bf080148d4953f3a99d2bfe51fdbed0 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 22 Nov 2022 18:48:00 -0800 Subject: [PATCH 2/5] revert --- pandas/tseries/frequencies.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index d422b6db409d1..2796db89d60f3 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -1,10 +1,5 @@ from __future__ import annotations -from typing import ( - TYPE_CHECKING, - cast, -) - import numpy as np from pandas._libs.algos import unique_deltas @@ -47,13 +42,6 @@ from pandas.core.algorithms import unique -if TYPE_CHECKING: - from pandas import ( - DatetimeIndex, - TimedeltaIndex, - ) - from pandas.core.arrays import TimedeltaArray - # --------------------------------------------------------------------- # Offset names ("time rules") and related functions @@ -192,15 +180,18 @@ class _FrequencyInferer: Not sure if I can avoid the state machine here """ - def __init__(self, index: DatetimeIndex | TimedeltaIndex | TimedeltaArray) -> None: + def __init__(self, index) -> None: self.index = index self.i8values = index.asi8 # For get_unit_from_dtype we need the dtype to the underlying ndarray, # which for tz-aware is not the same as index.dtype if isinstance(index, ABCIndex): - index = cast("DatetimeIndex | TimedeltaIndex", index) - self._creso = get_unit_from_dtype(index._data._ndarray.dtype) + # error: Item "ndarray[Any, Any]" of "Union[ExtensionArray, + # ndarray[Any, Any]]" has no attribute "_ndarray" + self._creso = get_unit_from_dtype( + index._data._ndarray.dtype # type: ignore[union-attr] + ) else: # otherwise we have DTA/TDA self._creso = get_unit_from_dtype(index._ndarray.dtype) From 21dcd3c85103de2fccda017aa45075459107aa68 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 23 Nov 2022 08:59:05 -0800 Subject: [PATCH 3/5] mypy fixup --- pandas/core/construction.py | 1 + pandas/io/pytables.py | 1 + 2 files changed, 2 insertions(+) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 6e1524cb7df32..1a55ffecebe4c 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -485,6 +485,7 @@ def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray: mask = ma.getmaskarray(data) if mask.any(): dtype, fill_value = maybe_promote(data.dtype, np.nan) + dtype = cast(np.dtype, dtype) data = data.astype(dtype, copy=True) data.soften_mask() # set hardmask False if it was True data[mask] = fill_value diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index d1d69499fc089..8246d3a7e5c96 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4046,6 +4046,7 @@ def get_blk_items(mgr): blk_items = get_blk_items(mgr) for c in data_columns: mgr = frame.reindex([c], axis=axis)._mgr + mgr = cast(BlockManager, mgr) blocks.extend(mgr.blocks) blk_items.extend(get_blk_items(mgr)) From b200ba399e3e2e49b2c3a27adff452342ad4acd5 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 23 Nov 2022 10:39:14 -0800 Subject: [PATCH 4/5] mypy fixup --- pandas/core/construction.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 1a55ffecebe4c..8984c54e39071 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -486,7 +486,9 @@ def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray: if mask.any(): dtype, fill_value = maybe_promote(data.dtype, np.nan) dtype = cast(np.dtype, dtype) - data = data.astype(dtype, copy=True) + # Incompatible types in assignment (expression has type "ndarray[Any, + # dtype[Any]]", variable has type "MaskedArray[Any, Any]") + data = data.astype(dtype, copy=True) # type: ignore[assignment] data.soften_mask() # set hardmask False if it was True data[mask] = fill_value else: From 12ca9908515f1fc6fbaf678903c412d6f0c8de5e Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 23 Nov 2022 12:30:26 -0800 Subject: [PATCH 5/5] troubleshoot min_version build --- pandas/tests/extension/base/dtype.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py index 1bb901a4d615e..2635343d73fd7 100644 --- a/pandas/tests/extension/base/dtype.py +++ b/pandas/tests/extension/base/dtype.py @@ -71,9 +71,14 @@ def test_check_dtype(self, data): ) result = df.dtypes == str(dtype) - # pre-numpy 1.20 this comparison could raise or in some cases - # come back True - assert np.dtype("int64") != "Int64" + try: + new_numpy_behavior = np.dtype("int64") != "Int64" + except TypeError: + # numpy<=1.20.3 this comparison could raise or in some cases + # come back True + new_numpy_behavior = True + assert new_numpy_behavior + expected = pd.Series([True, True, False, False], index=list("ABCD")) self.assert_series_equal(result, expected)