Skip to content

API: Avoid returning same object for various methods #51032

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -624,7 +624,7 @@ Other API changes
- Loading a JSON file with duplicate columns using ``read_json(orient='split')`` renames columns to avoid duplicates, as :func:`read_csv` and the other readers do (:issue:`50370`)
- The levels of the index of the :class:`Series` returned from ``Series.sparse.from_coo`` now always have dtype ``int32``. Previously they had dtype ``int64`` (:issue:`50926`)
- :func:`to_datetime` with ``unit`` of either "Y" or "M" will now raise if a sequence contains a non-round ``float`` value, matching the ``Timestamp`` behavior (:issue:`50301`)
-
- The methods :meth:`Series.round`, :meth:`DataFrame.__invert__`, :meth:`Series.__invert__`, :meth:`DataFrame.swapaxes`, :meth:`DataFrame.first`, :meth:`DataFrame.last`, :meth:`Series.first`, :meth:`Series.last` and :meth:`DataFrame.align` will now always return new objects (:issue:`51032`)

.. ---------------------------------------------------------------------------
.. _whatsnew_200.deprecations:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -9929,7 +9929,7 @@ def _series_round(ser: Series, decimals: int):
concat(new_cols, axis=1), index=self.index, columns=self.columns
).__finalize__(self, method="round")
else:
return self
return self.copy(deep=False)

# ----------------------------------------------------------------------
# Statistical methods, etc.
Expand Down
20 changes: 5 additions & 15 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -779,8 +779,6 @@ def swapaxes(
j = self._get_axis_number(axis2)

if i == j:
if copy is False and not using_copy_on_write():
return self
return self.copy(deep=copy)

mapping = {i: j, j: i}
Expand Down Expand Up @@ -1486,7 +1484,7 @@ def blk_func(values: ArrayLike):
def __invert__(self: NDFrameT) -> NDFrameT:
if not self.size:
# inv fails with 0 len
return self
return self.copy(deep=False)

new_data = self._mgr.apply(operator.invert)
return self._constructor(new_data).__finalize__(self, method="__invert__")
Expand Down Expand Up @@ -8890,7 +8888,7 @@ def first(self: NDFrameT, offset) -> NDFrameT:
raise TypeError("'first' only supports a DatetimeIndex index")

if len(self.index) == 0:
return self
return self.copy(deep=False)

offset = to_offset(offset)
if not isinstance(offset, Tick) and offset.is_on_offset(self.index[0]):
Expand Down Expand Up @@ -8963,7 +8961,7 @@ def last(self: NDFrameT, offset) -> NDFrameT:
raise TypeError("'last' only supports a DatetimeIndex index")

if len(self.index) == 0:
return self
return self.copy(deep=False)

offset = to_offset(offset)

Expand Down Expand Up @@ -9471,8 +9469,6 @@ def _align_series(
limit=None,
fill_axis: Axis = 0,
):
uses_cow = using_copy_on_write()

is_series = isinstance(self, ABCSeries)

if (not is_series and axis is None) or axis not in [None, 0, 1]:
Expand All @@ -9495,10 +9491,7 @@ def _align_series(
if is_series:
left = self._reindex_indexer(join_index, lidx, copy)
elif lidx is None or join_index is None:
if uses_cow:
left = self.copy(deep=copy)
else:
left = self.copy(deep=copy) if copy or copy is None else self
left = self.copy(deep=copy)
else:
left = self._constructor(
self._mgr.reindex_indexer(join_index, lidx, axis=1, copy=copy)
Expand Down Expand Up @@ -9527,10 +9520,7 @@ def _align_series(
left = self._constructor(fdata)

if ridx is None:
if uses_cow:
right = other.copy(deep=copy)
else:
right = other.copy(deep=copy) if copy or copy is None else other
right = other.copy(deep=copy)
else:
right = other.reindex(join_index, level=level)

Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/frame/methods/test_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,3 +414,23 @@ def test_align_series_check_copy(self):
result, other = df.align(ser, axis=1)
ser.iloc[0] = 100
tm.assert_series_equal(other, expected)

def test_align_identical_different_object(self):
# GH#51032
df = DataFrame({"a": [1, 2]})
ser = Series([3, 4])
result, result2 = df.align(ser, axis=0)
tm.assert_frame_equal(result, df)
tm.assert_series_equal(result2, ser)
assert df is not result
assert ser is not result2

def test_align_identical_different_object_columns(self):
# GH#51032
df = DataFrame({"a": [1, 2]})
ser = Series([1], index=["a"])
result, result2 = df.align(ser, axis=1)
tm.assert_frame_equal(result, df)
tm.assert_series_equal(result2, ser)
assert df is not result
assert ser is not result2
9 changes: 9 additions & 0 deletions pandas/tests/frame/methods/test_first_and_last.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""
import pytest

import pandas as pd
from pandas import (
DataFrame,
bdate_range,
Expand Down Expand Up @@ -86,3 +87,11 @@ def test_first_with_first_day_end_of_frq_n_greater_one(self, frame_or_series):
[1] * 23, index=bdate_range("2010-03-31", "2010-04-30")
)
tm.assert_equal(result, expected)

@pytest.mark.parametrize("func", ["first", "last"])
def test_empty_not_input(self, func):
# GH#51032
df = DataFrame(index=pd.DatetimeIndex([]))
result = getattr(df, func)(offset=1)
tm.assert_frame_equal(df, result)
assert df is not result
7 changes: 7 additions & 0 deletions pandas/tests/frame/methods/test_round.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,3 +216,10 @@ def test_round_interval_category_columns(self):
result = df.round()
expected = DataFrame([[1.0, 1.0], [0.0, 0.0]], columns=columns)
tm.assert_frame_equal(result, expected)

def test_round_empty_not_input(self):
# GH#51032
df = DataFrame()
result = df.round()
tm.assert_frame_equal(df, result)
assert df is not result
7 changes: 7 additions & 0 deletions pandas/tests/frame/methods/test_swapaxes.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,10 @@ def test_swapaxes_invalid_axis(self):
msg = "No axis named 2 for object type DataFrame"
with pytest.raises(ValueError, match=msg):
df.swapaxes(2, 5)

def test_round_empty_not_input(self):
# GH#51032
df = DataFrame({"a": [1, 2]})
result = df.swapaxes("index", "index")
tm.assert_frame_equal(df, result)
assert df is not result
7 changes: 7 additions & 0 deletions pandas/tests/frame/test_unary.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,13 @@ def test_invert_mixed(self):
)
tm.assert_frame_equal(result, expected)

def test_invert_empy_not_input(self):
# GH#51032
df = pd.DataFrame()
result = ~df
tm.assert_frame_equal(df, result)
assert df is not result

@pytest.mark.parametrize(
"df",
[
Expand Down