Skip to content

Commit ded62d8

Browse files
authored
Merge branch 'pandas-dev:main' into main
2 parents 65bf672 + faa8114 commit ded62d8

File tree

30 files changed

+69
-168
lines changed

30 files changed

+69
-168
lines changed

doc/_templates/pandas_footer.html

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
<p class="copyright">
2+
&copy {{copyright}} pandas via <a href="https://numfocus.org">NumFOCUS, Inc.</a> Hosted by <a href="https://www.ovhcloud.com">OVH Cloud</a>.
3+
</p>

doc/source/conf.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -163,11 +163,8 @@
163163

164164
# General information about the project.
165165
project = "pandas"
166-
copyright = (
167-
f"{datetime.now().year} "
168-
'pandas via <a href="https://numfocus.org">NumFOCUS, Inc.</a> '
169-
'Hosted by <a href="https://www.ovhcloud.com">OVH Cloud</a>'
170-
)
166+
# We have our custom "pandas_footer.html" template, using copyright for the current year
167+
copyright = f"{datetime.now().year}"
171168

172169
# The version info for the project you're documenting, acts as replacement for
173170
# |version| and |release|, also used in various other places throughout the
@@ -243,6 +240,7 @@
243240

244241
html_theme_options = {
245242
"external_links": [],
243+
"footer_items": ["pandas_footer", "sphinx-version"],
246244
"github_url": "https://github.com/pandas-dev/pandas",
247245
"twitter_url": "https://twitter.com/pandas_dev",
248246
"google_analytics_id": "UA-27880019-2",

doc/source/user_guide/indexing.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1723,12 +1723,13 @@ the given columns to a MultiIndex:
17231723
frame
17241724
17251725
Other options in ``set_index`` allow you not drop the index columns or to add
1726-
the index without creating a copy of the underlying data:
1726+
the index in-place (without creating a new object):
17271727

17281728
.. ipython:: python
17291729
17301730
data.set_index('c', drop=False)
1731-
data.set_index(['a', 'b'], copy=False)
1731+
data.set_index(['a', 'b'], inplace=True)
1732+
data
17321733
17331734
Reset the index
17341735
~~~~~~~~~~~~~~~

doc/source/whatsnew/v1.5.0.rst

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,6 @@ Other enhancements
330330
- :meth:`DataFrame.quantile` gained a ``method`` argument that can accept ``table`` to evaluate multi-column quantiles (:issue:`43881`)
331331
- :class:`Interval` now supports checking whether one interval is contained by another interval (:issue:`46613`)
332332
- Added ``copy`` keyword to :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` to allow user to set axis on a new object without necessarily copying the underlying data (:issue:`47932`)
333-
- :meth:`DataFrame.set_index` now supports a ``copy`` keyword. If ``False``, the underlying data is not copied when a new :class:`DataFrame` is returned (:issue:`48043`)
334333
- The method :meth:`.ExtensionArray.factorize` accepts ``use_na_sentinel=False`` for determining how null values are to be treated (:issue:`46601`)
335334
- The ``Dockerfile`` now installs a dedicated ``pandas-dev`` virtual environment for pandas development instead of using the ``base`` environment (:issue:`48427`)
336335

@@ -934,7 +933,6 @@ Other Deprecations
934933
- Deprecated the ``inplace`` keyword in :meth:`DataFrame.set_axis` and :meth:`Series.set_axis`, use ``obj = obj.set_axis(..., copy=False)`` instead (:issue:`48130`)
935934
- Deprecated producing a single element when iterating over a :class:`DataFrameGroupBy` or a :class:`SeriesGroupBy` that has been grouped by a list of length 1; A tuple of length one will be returned instead (:issue:`42795`)
936935
- Fixed up warning message of deprecation of :meth:`MultiIndex.lesort_depth` as public method, as the message previously referred to :meth:`MultiIndex.is_lexsorted` instead (:issue:`38701`)
937-
- Deprecated the ``inplace`` keyword in :meth:`DataFrame.set_index`, use ``df = df.set_index(..., copy=False)`` instead (:issue:`48115`)
938936
- Deprecated the ``sort_columns`` argument in :meth:`DataFrame.plot` and :meth:`Series.plot` (:issue:`47563`).
939937
- Deprecated positional arguments for all but the first argument of :meth:`DataFrame.to_stata` and :func:`read_stata`, use keyword arguments instead (:issue:`48128`).
940938
- Deprecated the ``mangle_dupe_cols`` argument in :func:`read_csv`, :func:`read_fwf`, :func:`read_table` and :func:`read_excel`. The argument was never implemented, and a new argument where the renaming pattern can be specified will be added instead (:issue:`47718`)

pandas/core/frame.py

Lines changed: 4 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -5869,9 +5869,8 @@ def set_index(
58695869
*,
58705870
drop: bool = ...,
58715871
append: bool = ...,
5872-
inplace: Literal[False] | lib.NoDefault = ...,
5872+
inplace: Literal[False] = ...,
58735873
verify_integrity: bool = ...,
5874-
copy: bool | lib.NoDefault = ...,
58755874
) -> DataFrame:
58765875
...
58775876

@@ -5884,7 +5883,6 @@ def set_index(
58845883
append: bool = ...,
58855884
inplace: Literal[True],
58865885
verify_integrity: bool = ...,
5887-
copy: bool | lib.NoDefault = ...,
58885886
) -> None:
58895887
...
58905888

@@ -5894,9 +5892,8 @@ def set_index(
58945892
keys,
58955893
drop: bool = True,
58965894
append: bool = False,
5897-
inplace: bool | lib.NoDefault = lib.no_default,
5895+
inplace: bool = False,
58985896
verify_integrity: bool = False,
5899-
copy: bool | lib.NoDefault = lib.no_default,
59005897
) -> DataFrame | None:
59015898
"""
59025899
Set the DataFrame index using existing columns.
@@ -5919,18 +5916,10 @@ def set_index(
59195916
Whether to append columns to existing index.
59205917
inplace : bool, default False
59215918
Whether to modify the DataFrame rather than creating a new one.
5922-
5923-
.. deprecated:: 1.5.0
5924-
59255919
verify_integrity : bool, default False
59265920
Check the new index for duplicates. Otherwise defer the check until
59275921
necessary. Setting to False will improve the performance of this
59285922
method.
5929-
copy : bool, default True
5930-
Whether to make a copy of the underlying data when returning a new
5931-
DataFrame.
5932-
5933-
.. versionadded:: 1.5.0
59345923
59355924
Returns
59365925
-------
@@ -5995,25 +5984,7 @@ def set_index(
59955984
3 9 7 2013 84
59965985
4 16 10 2014 31
59975986
"""
5998-
if inplace is not lib.no_default:
5999-
inplace = validate_bool_kwarg(inplace, "inplace")
6000-
warnings.warn(
6001-
"The 'inplace' keyword in DataFrame.set_index is deprecated "
6002-
"and will be removed in a future version. Use "
6003-
"`df = df.set_index(..., copy=False)` instead.",
6004-
FutureWarning,
6005-
stacklevel=find_stack_level(inspect.currentframe()),
6006-
)
6007-
else:
6008-
inplace = False
6009-
6010-
if inplace:
6011-
if copy is not lib.no_default:
6012-
raise ValueError("Cannot specify copy when inplace=True")
6013-
copy = False
6014-
elif copy is lib.no_default:
6015-
copy = True
6016-
5987+
inplace = validate_bool_kwarg(inplace, "inplace")
60175988
self._check_inplace_and_allows_duplicate_labels(inplace)
60185989
if not isinstance(keys, list):
60195990
keys = [keys]
@@ -6049,7 +6020,7 @@ def set_index(
60496020
if inplace:
60506021
frame = self
60516022
else:
6052-
frame = self.copy(deep=copy)
6023+
frame = self.copy()
60536024

60546025
arrays = []
60556026
names: list[Hashable] = []

pandas/core/internals/blocks.py

Lines changed: 6 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -881,8 +881,13 @@ def take_nd(
881881
)
882882

883883
# Called from three places in managers, all of which satisfy
884-
# this assertion
884+
# these assertions
885+
if isinstance(self, ExtensionBlock):
886+
# NB: in this case, the 'axis' kwarg will be ignored in the
887+
# algos.take_nd call above.
888+
assert not (self.ndim == 1 and new_mgr_locs is None)
885889
assert not (axis == 0 and new_mgr_locs is None)
890+
886891
if new_mgr_locs is None:
887892
new_mgr_locs = self._mgr_locs
888893

@@ -1753,33 +1758,6 @@ def is_view(self) -> bool:
17531758
def is_numeric(self):
17541759
return self.values.dtype._is_numeric
17551760

1756-
def take_nd(
1757-
self,
1758-
indexer: npt.NDArray[np.intp],
1759-
axis: int = 0,
1760-
new_mgr_locs: BlockPlacement | None = None,
1761-
fill_value=lib.no_default,
1762-
) -> Block:
1763-
"""
1764-
Take values according to indexer and return them as a block.
1765-
"""
1766-
if fill_value is lib.no_default:
1767-
fill_value = None
1768-
1769-
# TODO(EA2D): special case not needed with 2D EAs
1770-
# axis doesn't matter; we are really a single-dim object
1771-
# but are passed the axis depending on the calling routing
1772-
# if its REALLY axis 0, then this will be a reindex and not a take
1773-
new_values = self.values.take(indexer, fill_value=fill_value, allow_fill=True)
1774-
1775-
# Called from three places in managers, all of which satisfy
1776-
# this assertion
1777-
assert not (self.ndim == 1 and new_mgr_locs is None)
1778-
if new_mgr_locs is None:
1779-
new_mgr_locs = self._mgr_locs
1780-
1781-
return self.make_block_same_class(new_values, new_mgr_locs)
1782-
17831761
def _slice(
17841762
self, slicer: slice | npt.NDArray[np.bool_] | npt.NDArray[np.intp]
17851763
) -> ExtensionArray:

pandas/core/internals/managers.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,9 @@ def reindex_indexer(
726726
result.axes[axis] = new_axis
727727
return result
728728

729+
# Should be intp, but in some cases we get int64 on 32bit builds
730+
assert isinstance(indexer, np.ndarray)
731+
729732
# some axes don't allow reindexing with dups
730733
if not allow_dups:
731734
self.axes[axis]._validate_can_reindex(indexer)

pandas/core/reshape/merge.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -783,9 +783,9 @@ def get_result(self, copy: bool = True) -> DataFrame:
783783
if self.indicator:
784784
result = self._indicator_post_merge(result)
785785

786-
result = self._maybe_add_join_keys(result, left_indexer, right_indexer)
786+
self._maybe_add_join_keys(result, left_indexer, right_indexer)
787787

788-
result = self._maybe_restore_index_levels(result)
788+
self._maybe_restore_index_levels(result)
789789

790790
self._maybe_drop_cross_column(result, self._cross)
791791

@@ -852,7 +852,7 @@ def _indicator_post_merge(self, result: DataFrame) -> DataFrame:
852852
result = result.drop(labels=["_left_indicator", "_right_indicator"], axis=1)
853853
return result
854854

855-
def _maybe_restore_index_levels(self, result: DataFrame) -> DataFrame:
855+
def _maybe_restore_index_levels(self, result: DataFrame) -> None:
856856
"""
857857
Restore index levels specified as `on` parameters
858858
@@ -870,7 +870,7 @@ def _maybe_restore_index_levels(self, result: DataFrame) -> DataFrame:
870870
871871
Returns
872872
-------
873-
DataFrame
873+
None
874874
"""
875875
names_to_restore = []
876876
for name, left_key, right_key in zip(
@@ -894,15 +894,14 @@ def _maybe_restore_index_levels(self, result: DataFrame) -> DataFrame:
894894
names_to_restore.append(name)
895895

896896
if names_to_restore:
897-
result = result.set_index(names_to_restore, copy=False)
898-
return result
897+
result.set_index(names_to_restore, inplace=True)
899898

900899
def _maybe_add_join_keys(
901900
self,
902901
result: DataFrame,
903902
left_indexer: np.ndarray | None,
904903
right_indexer: np.ndarray | None,
905-
) -> DataFrame:
904+
) -> None:
906905

907906
left_has_missing = None
908907
right_has_missing = None
@@ -993,12 +992,11 @@ def _maybe_add_join_keys(
993992
for level_name in result.index.names
994993
]
995994

996-
result = result.set_index(idx_list, copy=False)
995+
result.set_index(idx_list, inplace=True)
997996
else:
998997
result.index = Index(key_col, name=name)
999998
else:
1000999
result.insert(i, name or f"key_{i}", key_col)
1001-
return result
10021000

10031001
def _get_join_indexers(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
10041002
"""return the join indexers"""
@@ -1771,8 +1769,7 @@ def get_result(self, copy: bool = True) -> DataFrame:
17711769
result = self._reindex_and_concat(
17721770
join_index, left_join_indexer, right_join_indexer, copy=copy
17731771
)
1774-
1775-
result = self._maybe_add_join_keys(result, left_indexer, right_indexer)
1772+
self._maybe_add_join_keys(result, left_indexer, right_indexer)
17761773

17771774
return result
17781775

pandas/io/parsers/arrow_parser_wrapper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def _finalize_output(self, frame: DataFrame) -> DataFrame:
117117
# String case
118118
if item not in frame.columns:
119119
raise ValueError(f"Index {item} invalid")
120-
frame = frame.set_index(self.index_col, drop=True, copy=False)
120+
frame.set_index(self.index_col, drop=True, inplace=True)
121121
# Clear names if headerless and no name given
122122
if self.header is None and not multi_index_named:
123123
frame.index.names = [None] * len(frame.index.names)

pandas/io/pytables.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4673,7 +4673,7 @@ def read(
46734673
columns.insert(0, n)
46744674
s = super().read(where=where, columns=columns, start=start, stop=stop)
46754675
if is_multi_index:
4676-
s = s.set_index(self.levels, copy=False)
4676+
s.set_index(self.levels, inplace=True)
46774677

46784678
s = s.iloc[:, 0]
46794679

pandas/io/sql.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ def _wrap_result(
152152
frame = _parse_date_columns(frame, parse_dates)
153153

154154
if index_col is not None:
155-
frame = frame.set_index(index_col, copy=False)
155+
frame.set_index(index_col, inplace=True)
156156

157157
return frame
158158

@@ -980,7 +980,7 @@ def _query_iterator(
980980
self._harmonize_columns(parse_dates=parse_dates)
981981

982982
if self.index is not None:
983-
self.frame = self.frame.set_index(self.index, copy=False)
983+
self.frame.set_index(self.index, inplace=True)
984984

985985
yield self.frame
986986

@@ -1021,7 +1021,7 @@ def read(
10211021
self._harmonize_columns(parse_dates=parse_dates)
10221022

10231023
if self.index is not None:
1024-
self.frame = self.frame.set_index(self.index, copy=False)
1024+
self.frame.set_index(self.index, inplace=True)
10251025

10261026
return self.frame
10271027

pandas/tests/extension/base/dim2.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@
1818

1919

2020
class Dim2CompatTests(BaseExtensionTests):
21+
# Note: these are ONLY for ExtensionArray subclasses that support 2D arrays.
22+
# i.e. not for pyarrow-backed EAs.
23+
2124
def test_transpose(self, data):
2225
arr2d = data.repeat(2).reshape(-1, 2)
2326
shape = arr2d.shape

pandas/tests/extension/test_arrow.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -319,20 +319,6 @@ def test_from_sequence_of_strings_pa_array(self, data, request):
319319
tm.assert_extension_array_equal(result, data)
320320

321321

322-
@pytest.mark.xfail(
323-
raises=NotImplementedError, reason="pyarrow.ChunkedArray backing is 1D."
324-
)
325-
class TestDim2Compat(base.Dim2CompatTests):
326-
pass
327-
328-
329-
@pytest.mark.xfail(
330-
raises=NotImplementedError, reason="pyarrow.ChunkedArray backing is 1D."
331-
)
332-
class TestNDArrayBacked2D(base.NDArrayBacked2DTests):
333-
pass
334-
335-
336322
class TestGetitemTests(base.BaseGetitemTests):
337323
@pytest.mark.xfail(
338324
reason=(

pandas/tests/frame/methods/test_combine_first.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -394,12 +394,12 @@ def test_combine_first_string_dtype_only_na(self, nullable_string_dtype):
394394
PerformanceWarning,
395395
pa_version_under7p0 and nullable_string_dtype == "string[pyarrow]",
396396
):
397-
df = df.set_index(["a", "b"], copy=False)
397+
df.set_index(["a", "b"], inplace=True)
398398
with tm.maybe_produces_warning(
399399
PerformanceWarning,
400400
pa_version_under7p0 and nullable_string_dtype == "string[pyarrow]",
401401
):
402-
df2 = df2.set_index(["a", "b"], copy=False)
402+
df2.set_index(["a", "b"], inplace=True)
403403
result = df.combine_first(df2)
404404
with tm.maybe_produces_warning(
405405
PerformanceWarning,

0 commit comments

Comments
 (0)