diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 843d530417e1e..7c905976c0191 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -624,7 +624,7 @@ Other API changes - Loading a JSON file with duplicate columns using ``read_json(orient='split')`` renames columns to avoid duplicates, as :func:`read_csv` and the other readers do (:issue:`50370`) - The levels of the index of the :class:`Series` returned from ``Series.sparse.from_coo`` now always have dtype ``int32``. Previously they had dtype ``int64`` (:issue:`50926`) - :func:`to_datetime` with ``unit`` of either "Y" or "M" will now raise if a sequence contains a non-round ``float`` value, matching the ``Timestamp`` behavior (:issue:`50301`) -- +- The methods :meth:`Series.round`, :meth:`DataFrame.__invert__`, :meth:`Series.__invert__`, :meth:`DataFrame.swapaxes`, :meth:`DataFrame.first`, :meth:`DataFrame.last`, :meth:`Series.first`, :meth:`Series.last` and :meth:`DataFrame.align` will now always return new objects (:issue:`51032`) .. --------------------------------------------------------------------------- .. _whatsnew_200.deprecations: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1b7dc8b09e6ad..4b3c21239fd5b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9929,7 +9929,7 @@ def _series_round(ser: Series, decimals: int): concat(new_cols, axis=1), index=self.index, columns=self.columns ).__finalize__(self, method="round") else: - return self + return self.copy(deep=False) # ---------------------------------------------------------------------- # Statistical methods, etc. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ce123c704ba33..0998f00b4adbb 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -779,8 +779,6 @@ def swapaxes( j = self._get_axis_number(axis2) if i == j: - if copy is False and not using_copy_on_write(): - return self return self.copy(deep=copy) mapping = {i: j, j: i} @@ -1486,7 +1484,7 @@ def blk_func(values: ArrayLike): def __invert__(self: NDFrameT) -> NDFrameT: if not self.size: # inv fails with 0 len - return self + return self.copy(deep=False) new_data = self._mgr.apply(operator.invert) return self._constructor(new_data).__finalize__(self, method="__invert__") @@ -8890,7 +8888,7 @@ def first(self: NDFrameT, offset) -> NDFrameT: raise TypeError("'first' only supports a DatetimeIndex index") if len(self.index) == 0: - return self + return self.copy(deep=False) offset = to_offset(offset) if not isinstance(offset, Tick) and offset.is_on_offset(self.index[0]): @@ -8963,7 +8961,7 @@ def last(self: NDFrameT, offset) -> NDFrameT: raise TypeError("'last' only supports a DatetimeIndex index") if len(self.index) == 0: - return self + return self.copy(deep=False) offset = to_offset(offset) @@ -9471,8 +9469,6 @@ def _align_series( limit=None, fill_axis: Axis = 0, ): - uses_cow = using_copy_on_write() - is_series = isinstance(self, ABCSeries) if (not is_series and axis is None) or axis not in [None, 0, 1]: @@ -9495,10 +9491,7 @@ def _align_series( if is_series: left = self._reindex_indexer(join_index, lidx, copy) elif lidx is None or join_index is None: - if uses_cow: - left = self.copy(deep=copy) - else: - left = self.copy(deep=copy) if copy or copy is None else self + left = self.copy(deep=copy) else: left = self._constructor( self._mgr.reindex_indexer(join_index, lidx, axis=1, copy=copy) @@ -9527,10 +9520,7 @@ def _align_series( left = self._constructor(fdata) if ridx is None: - if uses_cow: - right = other.copy(deep=copy) - else: - right = other.copy(deep=copy) if copy or copy is None else other + right = other.copy(deep=copy) else: right = other.reindex(join_index, level=level) diff --git a/pandas/tests/frame/methods/test_align.py b/pandas/tests/frame/methods/test_align.py index d4883fd854e07..65aadc9de4f23 100644 --- a/pandas/tests/frame/methods/test_align.py +++ b/pandas/tests/frame/methods/test_align.py @@ -414,3 +414,23 @@ def test_align_series_check_copy(self): result, other = df.align(ser, axis=1) ser.iloc[0] = 100 tm.assert_series_equal(other, expected) + + def test_align_identical_different_object(self): + # GH#51032 + df = DataFrame({"a": [1, 2]}) + ser = Series([3, 4]) + result, result2 = df.align(ser, axis=0) + tm.assert_frame_equal(result, df) + tm.assert_series_equal(result2, ser) + assert df is not result + assert ser is not result2 + + def test_align_identical_different_object_columns(self): + # GH#51032 + df = DataFrame({"a": [1, 2]}) + ser = Series([1], index=["a"]) + result, result2 = df.align(ser, axis=1) + tm.assert_frame_equal(result, df) + tm.assert_series_equal(result2, ser) + assert df is not result + assert ser is not result2 diff --git a/pandas/tests/frame/methods/test_first_and_last.py b/pandas/tests/frame/methods/test_first_and_last.py index 6b11211526960..64f6665ecd709 100644 --- a/pandas/tests/frame/methods/test_first_and_last.py +++ b/pandas/tests/frame/methods/test_first_and_last.py @@ -3,6 +3,7 @@ """ import pytest +import pandas as pd from pandas import ( DataFrame, bdate_range, @@ -86,3 +87,11 @@ def test_first_with_first_day_end_of_frq_n_greater_one(self, frame_or_series): [1] * 23, index=bdate_range("2010-03-31", "2010-04-30") ) tm.assert_equal(result, expected) + + @pytest.mark.parametrize("func", ["first", "last"]) + def test_empty_not_input(self, func): + # GH#51032 + df = DataFrame(index=pd.DatetimeIndex([])) + result = getattr(df, func)(offset=1) + tm.assert_frame_equal(df, result) + assert df is not result diff --git a/pandas/tests/frame/methods/test_round.py b/pandas/tests/frame/methods/test_round.py index dd9206940bcd6..5579df41c1912 100644 --- a/pandas/tests/frame/methods/test_round.py +++ b/pandas/tests/frame/methods/test_round.py @@ -216,3 +216,10 @@ def test_round_interval_category_columns(self): result = df.round() expected = DataFrame([[1.0, 1.0], [0.0, 0.0]], columns=columns) tm.assert_frame_equal(result, expected) + + def test_round_empty_not_input(self): + # GH#51032 + df = DataFrame() + result = df.round() + tm.assert_frame_equal(df, result) + assert df is not result diff --git a/pandas/tests/frame/methods/test_swapaxes.py b/pandas/tests/frame/methods/test_swapaxes.py index 306f7b2b21cda..5da2c2292f137 100644 --- a/pandas/tests/frame/methods/test_swapaxes.py +++ b/pandas/tests/frame/methods/test_swapaxes.py @@ -20,3 +20,10 @@ def test_swapaxes_invalid_axis(self): msg = "No axis named 2 for object type DataFrame" with pytest.raises(ValueError, match=msg): df.swapaxes(2, 5) + + def test_round_empty_not_input(self): + # GH#51032 + df = DataFrame({"a": [1, 2]}) + result = df.swapaxes("index", "index") + tm.assert_frame_equal(df, result) + assert df is not result diff --git a/pandas/tests/frame/test_unary.py b/pandas/tests/frame/test_unary.py index 9d38f621c0505..a9ec726ab443e 100644 --- a/pandas/tests/frame/test_unary.py +++ b/pandas/tests/frame/test_unary.py @@ -84,6 +84,13 @@ def test_invert_mixed(self): ) tm.assert_frame_equal(result, expected) + def test_invert_empy_not_input(self): + # GH#51032 + df = pd.DataFrame() + result = ~df + tm.assert_frame_equal(df, result) + assert df is not result + @pytest.mark.parametrize( "df", [