From 13876bed6db8cb707184b3d96ac4a59178f3ab2b Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 27 Jan 2023 18:58:39 -0500 Subject: [PATCH 1/3] CLN: Avoid returning same object for various methods --- pandas/core/frame.py | 2 +- pandas/core/generic.py | 20 +++++-------------- .../frame/methods/test_first_and_last.py | 9 +++++++++ pandas/tests/frame/methods/test_reindex.py | 20 +++++++++++++++++++ pandas/tests/frame/methods/test_round.py | 7 +++++++ pandas/tests/frame/methods/test_swapaxes.py | 7 +++++++ pandas/tests/frame/test_unary.py | 7 +++++++ 7 files changed, 56 insertions(+), 16 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1d86c81745a6a..33317f39d0f49 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9919,7 +9919,7 @@ def _series_round(ser: Series, decimals: int): concat(new_cols, axis=1), index=self.index, columns=self.columns ).__finalize__(self, method="round") else: - return self + return self.copy(deep=False) # ---------------------------------------------------------------------- # Statistical methods, etc. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ce123c704ba33..f6b31f8503484 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -779,8 +779,6 @@ def swapaxes( j = self._get_axis_number(axis2) if i == j: - if copy is False and not using_copy_on_write(): - return self return self.copy(deep=copy) mapping = {i: j, j: i} @@ -1486,7 +1484,7 @@ def blk_func(values: ArrayLike): def __invert__(self: NDFrameT) -> NDFrameT: if not self.size: # inv fails with 0 len - return self + return self.copy(deep=False) new_data = self._mgr.apply(operator.invert) return self._constructor(new_data).__finalize__(self, method="__invert__") @@ -8890,7 +8888,7 @@ def first(self: NDFrameT, offset) -> NDFrameT: raise TypeError("'first' only supports a DatetimeIndex index") if len(self.index) == 0: - return self + return self.copy(deep=False) offset = to_offset(offset) if not isinstance(offset, Tick) and offset.is_on_offset(self.index[0]): @@ -8963,7 +8961,7 @@ def last(self: NDFrameT, offset) -> NDFrameT: raise TypeError("'last' only supports a DatetimeIndex index") if len(self.index) == 0: - return self + return self.copy(deep=None) offset = to_offset(offset) @@ -9471,8 +9469,6 @@ def _align_series( limit=None, fill_axis: Axis = 0, ): - uses_cow = using_copy_on_write() - is_series = isinstance(self, ABCSeries) if (not is_series and axis is None) or axis not in [None, 0, 1]: @@ -9495,10 +9491,7 @@ def _align_series( if is_series: left = self._reindex_indexer(join_index, lidx, copy) elif lidx is None or join_index is None: - if uses_cow: - left = self.copy(deep=copy) - else: - left = self.copy(deep=copy) if copy or copy is None else self + left = self.copy(deep=copy) else: left = self._constructor( self._mgr.reindex_indexer(join_index, lidx, axis=1, copy=copy) @@ -9527,10 +9520,7 @@ def _align_series( left = self._constructor(fdata) if ridx is None: - if uses_cow: - right = other.copy(deep=copy) - else: - right = other.copy(deep=copy) if copy or copy is None else other + right = other.copy(deep=copy) else: right = other.reindex(join_index, level=level) diff --git a/pandas/tests/frame/methods/test_first_and_last.py b/pandas/tests/frame/methods/test_first_and_last.py index 6b11211526960..f10b57baa7c50 100644 --- a/pandas/tests/frame/methods/test_first_and_last.py +++ b/pandas/tests/frame/methods/test_first_and_last.py @@ -3,6 +3,7 @@ """ import pytest +import pandas as pd from pandas import ( DataFrame, bdate_range, @@ -86,3 +87,11 @@ def test_first_with_first_day_end_of_frq_n_greater_one(self, frame_or_series): [1] * 23, index=bdate_range("2010-03-31", "2010-04-30") ) tm.assert_equal(result, expected) + + @pytest.mark.parametrize("func", ["first", "last"]) + def test_empty_not_input(self, func): + # GH# + df = DataFrame(index=pd.DatetimeIndex([])) + result = getattr(df, func)(offset=1) + tm.assert_frame_equal(df, result) + assert df is not result diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index a627d0fbb4c7a..b065838ebeac3 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -1236,3 +1236,23 @@ def test_reindex_not_category(self, index_df, index_res, index_exp): result = df.reindex(index=index_res) expected = DataFrame(index=index_exp) tm.assert_frame_equal(result, expected) + + def test_reindex_identical_different_object(self): + # GH# + df = DataFrame({"a": [1, 2]}) + ser = Series([3, 4]) + result, result2 = df.align(ser, axis=0) + tm.assert_frame_equal(result, df) + tm.assert_series_equal(result2, ser) + assert df is not result + assert ser is not result2 + + def test_reindex_identical_different_object_columns(self): + # GH# + df = DataFrame({"a": [1, 2]}) + ser = Series([1], index=["a"]) + result, result2 = df.align(ser, axis=1) + tm.assert_frame_equal(result, df) + tm.assert_series_equal(result2, ser) + assert df is not result + assert ser is not result2 diff --git a/pandas/tests/frame/methods/test_round.py b/pandas/tests/frame/methods/test_round.py index dd9206940bcd6..1d7165d53b4df 100644 --- a/pandas/tests/frame/methods/test_round.py +++ b/pandas/tests/frame/methods/test_round.py @@ -216,3 +216,10 @@ def test_round_interval_category_columns(self): result = df.round() expected = DataFrame([[1.0, 1.0], [0.0, 0.0]], columns=columns) tm.assert_frame_equal(result, expected) + + def test_round_empty_not_input(self): + # GH# + df = DataFrame() + result = df.round() + tm.assert_frame_equal(df, result) + assert df is not result diff --git a/pandas/tests/frame/methods/test_swapaxes.py b/pandas/tests/frame/methods/test_swapaxes.py index 306f7b2b21cda..f177c68b4e0d5 100644 --- a/pandas/tests/frame/methods/test_swapaxes.py +++ b/pandas/tests/frame/methods/test_swapaxes.py @@ -20,3 +20,10 @@ def test_swapaxes_invalid_axis(self): msg = "No axis named 2 for object type DataFrame" with pytest.raises(ValueError, match=msg): df.swapaxes(2, 5) + + def test_round_empty_not_input(self): + # GH# + df = DataFrame({"a": [1, 2]}) + result = df.swapaxes("index", "index") + tm.assert_frame_equal(df, result) + assert df is not result diff --git a/pandas/tests/frame/test_unary.py b/pandas/tests/frame/test_unary.py index 9d38f621c0505..0b8997fc088d7 100644 --- a/pandas/tests/frame/test_unary.py +++ b/pandas/tests/frame/test_unary.py @@ -84,6 +84,13 @@ def test_invert_mixed(self): ) tm.assert_frame_equal(result, expected) + def test_invert_empy_not_input(self): + # GH# + df = pd.DataFrame() + result = ~df + tm.assert_frame_equal(df, result) + assert df is not result + @pytest.mark.parametrize( "df", [ From 617c66e258efa8c98d8e830d01680b017a5f30b7 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 27 Jan 2023 19:00:15 -0500 Subject: [PATCH 2/3] Add gh ref --- pandas/core/generic.py | 2 +- pandas/tests/frame/methods/test_first_and_last.py | 2 +- pandas/tests/frame/methods/test_reindex.py | 4 ++-- pandas/tests/frame/methods/test_round.py | 2 +- pandas/tests/frame/methods/test_swapaxes.py | 2 +- pandas/tests/frame/test_unary.py | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f6b31f8503484..0998f00b4adbb 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8961,7 +8961,7 @@ def last(self: NDFrameT, offset) -> NDFrameT: raise TypeError("'last' only supports a DatetimeIndex index") if len(self.index) == 0: - return self.copy(deep=None) + return self.copy(deep=False) offset = to_offset(offset) diff --git a/pandas/tests/frame/methods/test_first_and_last.py b/pandas/tests/frame/methods/test_first_and_last.py index f10b57baa7c50..64f6665ecd709 100644 --- a/pandas/tests/frame/methods/test_first_and_last.py +++ b/pandas/tests/frame/methods/test_first_and_last.py @@ -90,7 +90,7 @@ def test_first_with_first_day_end_of_frq_n_greater_one(self, frame_or_series): @pytest.mark.parametrize("func", ["first", "last"]) def test_empty_not_input(self, func): - # GH# + # GH#51032 df = DataFrame(index=pd.DatetimeIndex([])) result = getattr(df, func)(offset=1) tm.assert_frame_equal(df, result) diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index b065838ebeac3..101e12dd792dd 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -1238,7 +1238,7 @@ def test_reindex_not_category(self, index_df, index_res, index_exp): tm.assert_frame_equal(result, expected) def test_reindex_identical_different_object(self): - # GH# + # GH#51032 df = DataFrame({"a": [1, 2]}) ser = Series([3, 4]) result, result2 = df.align(ser, axis=0) @@ -1248,7 +1248,7 @@ def test_reindex_identical_different_object(self): assert ser is not result2 def test_reindex_identical_different_object_columns(self): - # GH# + # GH#51032 df = DataFrame({"a": [1, 2]}) ser = Series([1], index=["a"]) result, result2 = df.align(ser, axis=1) diff --git a/pandas/tests/frame/methods/test_round.py b/pandas/tests/frame/methods/test_round.py index 1d7165d53b4df..5579df41c1912 100644 --- a/pandas/tests/frame/methods/test_round.py +++ b/pandas/tests/frame/methods/test_round.py @@ -218,7 +218,7 @@ def test_round_interval_category_columns(self): tm.assert_frame_equal(result, expected) def test_round_empty_not_input(self): - # GH# + # GH#51032 df = DataFrame() result = df.round() tm.assert_frame_equal(df, result) diff --git a/pandas/tests/frame/methods/test_swapaxes.py b/pandas/tests/frame/methods/test_swapaxes.py index f177c68b4e0d5..5da2c2292f137 100644 --- a/pandas/tests/frame/methods/test_swapaxes.py +++ b/pandas/tests/frame/methods/test_swapaxes.py @@ -22,7 +22,7 @@ def test_swapaxes_invalid_axis(self): df.swapaxes(2, 5) def test_round_empty_not_input(self): - # GH# + # GH#51032 df = DataFrame({"a": [1, 2]}) result = df.swapaxes("index", "index") tm.assert_frame_equal(df, result) diff --git a/pandas/tests/frame/test_unary.py b/pandas/tests/frame/test_unary.py index 0b8997fc088d7..a9ec726ab443e 100644 --- a/pandas/tests/frame/test_unary.py +++ b/pandas/tests/frame/test_unary.py @@ -85,7 +85,7 @@ def test_invert_mixed(self): tm.assert_frame_equal(result, expected) def test_invert_empy_not_input(self): - # GH# + # GH#51032 df = pd.DataFrame() result = ~df tm.assert_frame_equal(df, result) From a270c4301a2755789b0d1e440cf1f9d94efc034d Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 30 Jan 2023 21:10:45 +0100 Subject: [PATCH 3/3] Update whatsnew and move test --- doc/source/whatsnew/v2.0.0.rst | 2 +- pandas/tests/frame/methods/test_align.py | 20 ++++++++++++++++++++ pandas/tests/frame/methods/test_reindex.py | 20 -------------------- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index c1d9b2744b27e..24dfdcaa6792a 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -622,7 +622,7 @@ Other API changes - Loading a JSON file with duplicate columns using ``read_json(orient='split')`` renames columns to avoid duplicates, as :func:`read_csv` and the other readers do (:issue:`50370`) - The levels of the index of the :class:`Series` returned from ``Series.sparse.from_coo`` now always have dtype ``int32``. Previously they had dtype ``int64`` (:issue:`50926`) - :func:`to_datetime` with ``unit`` of either "Y" or "M" will now raise if a sequence contains a non-round ``float`` value, matching the ``Timestamp`` behavior (:issue:`50301`) -- +- The methods :meth:`Series.round`, :meth:`DataFrame.__invert__`, :meth:`Series.__invert__`, :meth:`DataFrame.swapaxes`, :meth:`DataFrame.first`, :meth:`DataFrame.last`, :meth:`Series.first`, :meth:`Series.last` and :meth:`DataFrame.align` will now always return new objects (:issue:`51032`) .. --------------------------------------------------------------------------- .. _whatsnew_200.deprecations: diff --git a/pandas/tests/frame/methods/test_align.py b/pandas/tests/frame/methods/test_align.py index d4883fd854e07..65aadc9de4f23 100644 --- a/pandas/tests/frame/methods/test_align.py +++ b/pandas/tests/frame/methods/test_align.py @@ -414,3 +414,23 @@ def test_align_series_check_copy(self): result, other = df.align(ser, axis=1) ser.iloc[0] = 100 tm.assert_series_equal(other, expected) + + def test_align_identical_different_object(self): + # GH#51032 + df = DataFrame({"a": [1, 2]}) + ser = Series([3, 4]) + result, result2 = df.align(ser, axis=0) + tm.assert_frame_equal(result, df) + tm.assert_series_equal(result2, ser) + assert df is not result + assert ser is not result2 + + def test_align_identical_different_object_columns(self): + # GH#51032 + df = DataFrame({"a": [1, 2]}) + ser = Series([1], index=["a"]) + result, result2 = df.align(ser, axis=1) + tm.assert_frame_equal(result, df) + tm.assert_series_equal(result2, ser) + assert df is not result + assert ser is not result2 diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 101e12dd792dd..a627d0fbb4c7a 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -1236,23 +1236,3 @@ def test_reindex_not_category(self, index_df, index_res, index_exp): result = df.reindex(index=index_res) expected = DataFrame(index=index_exp) tm.assert_frame_equal(result, expected) - - def test_reindex_identical_different_object(self): - # GH#51032 - df = DataFrame({"a": [1, 2]}) - ser = Series([3, 4]) - result, result2 = df.align(ser, axis=0) - tm.assert_frame_equal(result, df) - tm.assert_series_equal(result2, ser) - assert df is not result - assert ser is not result2 - - def test_reindex_identical_different_object_columns(self): - # GH#51032 - df = DataFrame({"a": [1, 2]}) - ser = Series([1], index=["a"]) - result, result2 = df.align(ser, axis=1) - tm.assert_frame_equal(result, df) - tm.assert_series_equal(result2, ser) - assert df is not result - assert ser is not result2