From 593e400a54d36e317a29da0fa27269ba65c73ca2 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 18 Jan 2023 16:58:59 -0800 Subject: [PATCH 1/9] Use newer positional only syntax --- pandas/conftest.py | 2 - pandas/core/frame.py | 61 +++++++++----- pandas/core/series.py | 32 +++++--- pandas/tests/frame/methods/test_reindex.py | 9 ++- pandas/tests/series/methods/test_reindex.py | 7 +- pandas/util/_validators.py | 89 --------------------- 6 files changed, 70 insertions(+), 130 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 1a410f87c8552..341023d37b79d 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -149,8 +149,6 @@ def pytest_collection_modifyitems(items, config) -> None: ignored_doctest_warnings = [ # Docstring divides by zero to show behavior difference ("missing.mask_zero_div_zero", "divide by zero encountered"), - # Docstring demonstrates the call raises a warning - ("_validators.validate_axis_style_args", "Use named arguments"), ] for item in items: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index eb0eb34dbefc4..8190db7e696e0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -101,12 +101,10 @@ Appender, Substitution, doc, - rewrite_axis_style_signature, ) from pandas.util._exceptions import find_stack_level from pandas.util._validators import ( validate_ascending, - validate_axis_style_args, validate_bool_kwarg, validate_percentile, ) @@ -4992,24 +4990,47 @@ def set_axis( @Substitution(**_shared_doc_kwargs) @Appender(NDFrame.reindex.__doc__) - @rewrite_axis_style_signature( - "labels", - [ - ("method", None), - ("copy", None), - ("level", None), - ("fill_value", np.nan), - ("limit", None), - ("tolerance", None), - ], - ) - def reindex(self, *args, **kwargs) -> DataFrame: - axes = validate_axis_style_args(self, args, kwargs, "labels", "reindex") - kwargs.update(axes) - # Pop these, since the values are in `kwargs` under different names - kwargs.pop("axis", None) - kwargs.pop("labels", None) - return super().reindex(**kwargs) + def reindex( + self, + labels=None, + *, + index=None, + columns=None, + axis: Axis | None = None, + method: str | None = None, + copy: bool | None = None, + level: Level | None = None, + fill_value: Scalar = np.nan, + limit: int | None = None, + tolerance=None, + ) -> DataFrame: + if index is not None and columns is not None and labels is not None: + raise TypeError("Cannot specify all of 'labels', 'index', 'columns'.") + elif index is not None or columns is not None: + if axis is not None: + raise TypeError( + "Cannot specify both 'axis' and any of 'index' or 'columns'" + ) + if labels is not None: + if index is not None: + columns = labels + else: + index = labels + else: + if axis and self._get_axis_number(axis) == 1: + columns = labels + else: + index = labels + return super().reindex( + index=index, + columns=columns, + method=method, + copy=copy, + level=level, + fill_value=fill_value, + limit=limit, + tolerance=tolerance, + ) @overload def drop( diff --git a/pandas/core/series.py b/pandas/core/series.py index 91f7095e59db5..b042f83fcd1b3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -58,6 +58,7 @@ NaPosition, QuantileInterpolation, Renamer, + Scalar, SingleManager, SortKind, StorageOptions, @@ -4849,17 +4850,26 @@ def set_axis( optional_labels=_shared_doc_kwargs["optional_labels"], optional_axis=_shared_doc_kwargs["optional_axis"], ) - def reindex(self, *args, **kwargs) -> Series: - if len(args) > 1: - raise TypeError("Only one positional argument ('index') is allowed") - if args: - (index,) = args - if "index" in kwargs: - raise TypeError( - "'index' passed as both positional and keyword argument" - ) - kwargs.update({"index": index}) - return super().reindex(**kwargs) + def reindex( + self, + index=None, + *, + method: str | None = None, + copy: bool | None = None, + level: Level | None = None, + fill_value: Scalar = np.nan, + limit: int | None = None, + tolerance=None, + ) -> Series: + return super().reindex( + index=index, + method=method, + copy=copy, + level=level, + fill_value=fill_value, + limit=limit, + tolerance=tolerance, + ) @overload def drop( diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index a627d0fbb4c7a..f455213bd436b 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -841,17 +841,18 @@ def test_reindex_positional_raises(self): # https://github.com/pandas-dev/pandas/issues/12392 # Enforced in 2.0 df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - with pytest.raises(TypeError, match=r".* is ambiguous."): + msg = r"reindex\(\) takes from 1 to 2 positional arguments but 3 were given" + with pytest.raises(TypeError, match=msg): df.reindex([0, 1], ["A", "B", "C"]) def test_reindex_axis_style_raises(self): # https://github.com/pandas-dev/pandas/issues/12392 df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) with pytest.raises(TypeError, match="Cannot specify both 'axis'"): - df.reindex([0, 1], ["A"], axis=1) + df.reindex([0, 1], columns=["A"], axis=1) with pytest.raises(TypeError, match="Cannot specify both 'axis'"): - df.reindex([0, 1], ["A"], axis="index") + df.reindex([0, 1], columns=["A"], axis="index") with pytest.raises(TypeError, match="Cannot specify both 'axis'"): df.reindex(index=[0, 1], axis="index") @@ -866,7 +867,7 @@ def test_reindex_axis_style_raises(self): df.reindex(index=[0, 1], columns=[0, 1], axis="columns") with pytest.raises(TypeError, match="Cannot specify all"): - df.reindex([0, 1], [0], ["A"]) + df.reindex(labels=[0, 1], index=[0], columns=["A"]) # Mixing styles with pytest.raises(TypeError, match="Cannot specify both 'axis'"): diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index b00858d2779bc..2c427399c9cd5 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -369,16 +369,15 @@ def test_reindex_periodindex_with_object(p_values, o_values, values, expected_va def test_reindex_too_many_args(): # GH 40980 ser = Series([1, 2]) - with pytest.raises( - TypeError, match=r"Only one positional argument \('index'\) is allowed" - ): + msg = r"reindex\(\) takes from 1 to 2 positional arguments but 3 were given" + with pytest.raises(TypeError, match=msg): ser.reindex([2, 3], False) def test_reindex_double_index(): # GH 40980 ser = Series([1, 2]) - msg = r"'index' passed as both positional and keyword argument" + msg = r"reindex\(\) got multiple values for argument 'index'" with pytest.raises(TypeError, match=msg): ser.reindex([2, 3], index=[3, 4]) diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index 943d0ef3c1332..b60169f8364da 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -5,7 +5,6 @@ from __future__ import annotations from typing import ( - Any, Iterable, Sequence, TypeVar, @@ -262,94 +261,6 @@ def validate_bool_kwarg( return value -def validate_axis_style_args( - data, args, kwargs, arg_name, method_name -) -> dict[str, Any]: - """ - Argument handler for mixed index, columns / axis functions - - In an attempt to handle both `.method(index, columns)`, and - `.method(arg, axis=.)`, we have to do some bad things to argument - parsing. This translates all arguments to `{index=., columns=.}` style. - - Parameters - ---------- - data : DataFrame - args : tuple - All positional arguments from the user - kwargs : dict - All keyword arguments from the user - arg_name, method_name : str - Used for better error messages - - Returns - ------- - kwargs : dict - A dictionary of keyword arguments. Doesn't modify ``kwargs`` - inplace, so update them with the return value here. - - Examples - -------- - >>> df = pd.DataFrame(range(2)) - >>> validate_axis_style_args(df, (str.upper,), {'columns': id}, - ... 'mapper', 'rename') - {'columns': , 'index': } - """ - # TODO: Change to keyword-only args and remove all this - - out = {} - # Goal: fill 'out' with index/columns-style arguments - # like out = {'index': foo, 'columns': bar} - - # Start by validating for consistency - if "axis" in kwargs and any(x in kwargs for x in data._AXIS_TO_AXIS_NUMBER): - msg = "Cannot specify both 'axis' and any of 'index' or 'columns'." - raise TypeError(msg) - - # First fill with explicit values provided by the user... - if arg_name in kwargs: - if args: - msg = f"{method_name} got multiple values for argument '{arg_name}'" - raise TypeError(msg) - - axis = data._get_axis_name(kwargs.get("axis", 0)) - out[axis] = kwargs[arg_name] - - # More user-provided arguments, now from kwargs - for k, v in kwargs.items(): - try: - ax = data._get_axis_name(k) - except ValueError: - pass - else: - out[ax] = v - - # All user-provided kwargs have been handled now. - # Now we supplement with positional arguments, emitting warnings - # when there's ambiguity and raising when there's conflicts - - if len(args) == 0: - pass # It's up to the function to decide if this is valid - elif len(args) == 1: - axis = data._get_axis_name(kwargs.get("axis", 0)) - out[axis] = args[0] - elif len(args) == 2: - if "axis" in kwargs: - # Unambiguously wrong - msg = "Cannot specify both 'axis' and any of 'index' or 'columns'" - raise TypeError(msg) - - msg = ( - f"'.{method_name}(a, b)' is ambiguous. Use named keyword arguments" - "for 'index' or 'columns'." - ) - raise TypeError(msg) - else: - msg = f"Cannot specify all of '{arg_name}', 'index', 'columns'." - raise TypeError(msg) - return out - - def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True): """ Validate the keyword arguments to 'fillna'. From 94dd8e6db4de5ca70d6780a3841d266223180fa2 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 18 Jan 2023 18:11:58 -0800 Subject: [PATCH 2/9] Push logic into generic, improve docs --- pandas/core/frame.py | 36 ++++++++++++------------ pandas/core/generic.py | 62 +++++++++++++++++++++++++----------------- pandas/core/series.py | 4 +++ 3 files changed, 58 insertions(+), 44 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8190db7e696e0..dd024253d8ce4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -260,6 +260,12 @@ levels and/or index labels.""", "optional_labels": """labels : array-like, optional New labels / index to conform the axis specified by 'axis' to.""", + "optional_index": """index : array-like, optional + New labels for the index. Preferably an Index object to avoid + duplicating data.""", + "optional_column": """columns : array-like, optional + New labels for the columns. Preferably an Index object to avoid + duplicating data.""", "optional_axis": """axis : int or str, optional Axis to target. Can be either the axis name ('index', 'columns') or number (0, 1).""", @@ -4988,8 +4994,15 @@ def set_axis( ) -> DataFrame: return super().set_axis(labels, axis=axis, copy=copy) - @Substitution(**_shared_doc_kwargs) - @Appender(NDFrame.reindex.__doc__) + @doc( + NDFrame.reindex, # type: ignore[has-type] + klass=_shared_doc_kwargs["klass"], + axes=_shared_doc_kwargs["axes"], + optional_labels=_shared_doc_kwargs["optional_labels"], + optional_index=_shared_doc_kwargs["optional_index"], + optional_columns=_shared_doc_kwargs["optional_columns"], + optional_axis=_shared_doc_kwargs["optional_axis"], + ) def reindex( self, labels=None, @@ -5004,26 +5017,11 @@ def reindex( limit: int | None = None, tolerance=None, ) -> DataFrame: - if index is not None and columns is not None and labels is not None: - raise TypeError("Cannot specify all of 'labels', 'index', 'columns'.") - elif index is not None or columns is not None: - if axis is not None: - raise TypeError( - "Cannot specify both 'axis' and any of 'index' or 'columns'" - ) - if labels is not None: - if index is not None: - columns = labels - else: - index = labels - else: - if axis and self._get_axis_number(axis) == 1: - columns = labels - else: - index = labels return super().reindex( + labels=labels, index=index, columns=columns, + axis=axis, method=method, copy=copy, level=level, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ab9b76fbdf712..2694aa615b1d3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -69,6 +69,7 @@ NDFrameT, RandomState, Renamer, + Scalar, SortKind, StorageOptions, Suffixes, @@ -4970,7 +4971,19 @@ def sort_index( optional_labels="", optional_axis="", ) - def reindex(self: NDFrameT, *args, **kwargs) -> NDFrameT: + def reindex( + self: NDFrameT, + labels=None, + index=None, + columns=None, + axis: Axis | None = None, + method: str | None = None, + copy: bool_t | None = None, + level: Level | None = None, + fill_value: Scalar = np.nan, + limit: int | None = None, + tolerance=None, + ) -> NDFrameT: """ Conform {klass} to new index with optional filling logic. @@ -4981,9 +4994,8 @@ def reindex(self: NDFrameT, *args, **kwargs) -> NDFrameT: Parameters ---------- {optional_labels} - {axes} : array-like, optional - New labels / index to conform to, should be specified using - keywords. Preferably an Index object to avoid duplicating data. + {optional_index} + {optional_columns} {optional_axis} method : {{None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}} Method to use for filling holes in reindexed DataFrame. @@ -5173,31 +5185,31 @@ def reindex(self: NDFrameT, *args, **kwargs) -> NDFrameT: # TODO: Decide if we care about having different examples for different # kinds - # construct the args - axes, kwargs = self._construct_axes_from_arguments(args, kwargs) - method = clean_reindex_fill_method(kwargs.pop("method", None)) - level = kwargs.pop("level", None) - copy = kwargs.pop("copy", None) - limit = kwargs.pop("limit", None) - tolerance = kwargs.pop("tolerance", None) - fill_value = kwargs.pop("fill_value", None) - - # Series.reindex doesn't use / need the axis kwarg - # We pop and ignore it here, to make writing Series/Frame generic code - # easier - kwargs.pop("axis", None) - - if kwargs: - raise TypeError( - "reindex() got an unexpected keyword " - f'argument "{list(kwargs.keys())[0]}"' - ) + if index is not None and columns is not None and labels is not None: + raise TypeError("Cannot specify all of 'labels', 'index', 'columns'.") + elif index is not None or columns is not None: + if axis is not None: + raise TypeError( + "Cannot specify both 'axis' and any of 'index' or 'columns'" + ) + if labels is not None: + if index is not None: + columns = labels + else: + index = labels + else: + if axis and self._get_axis_number(axis) == 1: + columns = labels + else: + index = labels + axes = {"index": index, "columns": columns} + method = clean_reindex_fill_method(method) # if all axes that are requested to reindex are equal, then only copy # if indicated must have index names equal here as well as values if all( - self._get_axis(axis).identical(ax) - for axis, ax in axes.items() + self._get_axis(axis_name).identical(ax) + for axis_name, ax in axes.items() if ax is not None ): return self.copy(deep=copy) diff --git a/pandas/core/series.py b/pandas/core/series.py index b042f83fcd1b3..cd4ee2f9c8578 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -193,6 +193,8 @@ "optional_by": "", "optional_mapper": "", "optional_labels": "", + "optional_index": "", + "optional_columns": "", "optional_axis": "", "replace_iloc": """ This differs from updating with ``.loc`` or ``.iloc``, which require @@ -4848,6 +4850,8 @@ def set_axis( klass=_shared_doc_kwargs["klass"], axes=_shared_doc_kwargs["axes"], optional_labels=_shared_doc_kwargs["optional_labels"], + optional_index=_shared_doc_kwargs["optional_index"], + optional_columns=_shared_doc_kwargs["optional_columns"], optional_axis=_shared_doc_kwargs["optional_axis"], ) def reindex( From 22e681f3857a32b761ba6357537f47e29355008b Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 18 Jan 2023 18:15:28 -0800 Subject: [PATCH 3/9] Clarify whatsnew --- doc/source/whatsnew/v2.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 7054d93457264..28eacfdfb7e7b 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -698,7 +698,7 @@ Removal of prior version deprecations/changes - Disallow passing non-keyword arguments to :meth:`DataFrame.replace`, :meth:`Series.replace` except for ``to_replace`` and ``value`` (:issue:`47587`) - Disallow passing non-keyword arguments to :meth:`DataFrame.sort_values` except for ``by`` (:issue:`41505`) - Disallow passing non-keyword arguments to :meth:`Series.sort_values` (:issue:`41505`) -- Disallow passing 2 non-keyword arguments to :meth:`DataFrame.reindex` (:issue:`17966`) +- Disallow passing non-keyword arguments to :meth:`DataFrame.reindex` except for ``labels`` (:issue:`17966`) - Disallow :meth:`Index.reindex` with non-unique :class:`Index` objects (:issue:`42568`) - Disallowed constructing :class:`Categorical` with scalar ``data`` (:issue:`38433`) - Disallowed constructing :class:`CategoricalIndex` without passing ``data`` (:issue:`38944`) From cb5114cd18fcfe5c924fff7b00e6e35dd4c448dd Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 19 Jan 2023 09:37:09 -0800 Subject: [PATCH 4/9] Fix shared_doc --- pandas/core/frame.py | 2 +- pandas/core/generic.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index dd024253d8ce4..7bdc1ecee2760 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -263,7 +263,7 @@ "optional_index": """index : array-like, optional New labels for the index. Preferably an Index object to avoid duplicating data.""", - "optional_column": """columns : array-like, optional + "optional_columns": """columns : array-like, optional New labels for the columns. Preferably an Index object to avoid duplicating data.""", "optional_axis": """axis : int or str, optional diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2694aa615b1d3..460b08b4e01ae 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4969,6 +4969,8 @@ def sort_index( klass=_shared_doc_kwargs["klass"], axes=_shared_doc_kwargs["axes"], optional_labels="", + optional_index="", + optional_columns="", optional_axis="", ) def reindex( From 0dc034d30a558dc5f4b4900e677fa73ba0d268d4 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 19 Jan 2023 11:51:15 -0800 Subject: [PATCH 5/9] Make axis more generic --- pandas/core/frame.py | 1 - pandas/core/generic.py | 1 - pandas/core/series.py | 5 +++-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7bdc1ecee2760..9b5ee0c84643b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4997,7 +4997,6 @@ def set_axis( @doc( NDFrame.reindex, # type: ignore[has-type] klass=_shared_doc_kwargs["klass"], - axes=_shared_doc_kwargs["axes"], optional_labels=_shared_doc_kwargs["optional_labels"], optional_index=_shared_doc_kwargs["optional_index"], optional_columns=_shared_doc_kwargs["optional_columns"], diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 460b08b4e01ae..03d71da7be78f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4967,7 +4967,6 @@ def sort_index( @doc( klass=_shared_doc_kwargs["klass"], - axes=_shared_doc_kwargs["axes"], optional_labels="", optional_index="", optional_columns="", diff --git a/pandas/core/series.py b/pandas/core/series.py index cd4ee2f9c8578..9fb81f2560331 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -195,7 +195,8 @@ "optional_labels": "", "optional_index": "", "optional_columns": "", - "optional_axis": "", + "optional_axis": """axis : int or str, optional + Unused.""", "replace_iloc": """ This differs from updating with ``.loc`` or ``.iloc``, which require you to specify a location to update with some value.""", @@ -4848,7 +4849,6 @@ def set_axis( @doc( NDFrame.reindex, # type: ignore[has-type] klass=_shared_doc_kwargs["klass"], - axes=_shared_doc_kwargs["axes"], optional_labels=_shared_doc_kwargs["optional_labels"], optional_index=_shared_doc_kwargs["optional_index"], optional_columns=_shared_doc_kwargs["optional_columns"], @@ -4858,6 +4858,7 @@ def reindex( self, index=None, *, + axis: Axis | None = None, method: str | None = None, copy: bool | None = None, level: Level | None = None, From 7e5d256c9ef8879647f25906988192a8d4dd321e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 19 Jan 2023 13:29:01 -0800 Subject: [PATCH 6/9] Series has different default --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index cde1cc6411912..501d16857e678 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4881,7 +4881,7 @@ def reindex( method: str | None = None, copy: bool | None = None, level: Level | None = None, - fill_value: Scalar = np.nan, + fill_value: Scalar = None, limit: int | None = None, tolerance=None, ) -> Series: From cf55f341b0ea9a92a1f453edf218ed752875249d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 20 Jan 2023 12:15:45 -0800 Subject: [PATCH 7/9] Add some type ignores --- pandas/core/frame.py | 6 +++--- pandas/core/generic.py | 9 ++++++--- pandas/core/groupby/groupby.py | 2 +- pandas/core/series.py | 4 ++-- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 43987071004e0..a7e5028d9c252 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4995,14 +4995,14 @@ def set_axis( return super().set_axis(labels, axis=axis, copy=copy) @doc( - NDFrame.reindex, # type: ignore[has-type] + NDFrame.reindex, klass=_shared_doc_kwargs["klass"], optional_labels=_shared_doc_kwargs["optional_labels"], optional_index=_shared_doc_kwargs["optional_index"], optional_columns=_shared_doc_kwargs["optional_columns"], optional_axis=_shared_doc_kwargs["optional_axis"], ) - def reindex( + def reindex( # type: ignore[override] self, labels=None, *, @@ -5012,7 +5012,7 @@ def reindex( method: str | None = None, copy: bool | None = None, level: Level | None = None, - fill_value: Scalar = np.nan, + fill_value: Scalar | None = np.nan, limit: int | None = None, tolerance=None, ) -> DataFrame: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f8989ffc58e14..54ec79c49c2fb 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5119,7 +5119,7 @@ def reindex( method: str | None = None, copy: bool_t | None = None, level: Level | None = None, - fill_value: Scalar = np.nan, + fill_value: Scalar | None = np.nan, limit: int | None = None, tolerance=None, ) -> NDFrameT: @@ -5341,7 +5341,10 @@ def reindex( columns = labels else: index = labels - axes = {"index": index, "columns": columns} + axes: dict[Literal["index", "columns"], Any] = { + "index": index, + "columns": columns, + } method = clean_reindex_fill_method(method) # if all axes that are requested to reindex are equal, then only copy @@ -5530,7 +5533,7 @@ def filter( name = self._get_axis_name(axis) # error: Keywords must be strings return self.reindex( # type: ignore[misc] - **{name: [r for r in items if r in labels]} + **{name: [r for r in items if r in labels]} # type: ignore[arg-type] ) elif like: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index c15948ce877a8..c9bfa18e48a60 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -4098,7 +4098,7 @@ def _reindex_output( "copy": False, "fill_value": fill_value, } - return output.reindex(**d) + return output.reindex(**d) # type: ignore[arg-type] # GH 13204 # Here, the categorical in-axis groupers, which need to be fully diff --git a/pandas/core/series.py b/pandas/core/series.py index 41ea66e7d630a..53755f604c1f9 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4871,7 +4871,7 @@ def set_axis( optional_columns=_shared_doc_kwargs["optional_columns"], optional_axis=_shared_doc_kwargs["optional_axis"], ) - def reindex( + def reindex( # type: ignore[override] self, index=None, *, @@ -4879,7 +4879,7 @@ def reindex( method: str | None = None, copy: bool | None = None, level: Level | None = None, - fill_value: Scalar = None, + fill_value: Scalar | None = None, limit: int | None = None, tolerance=None, ) -> Series: From 70ec2bdb4e78ce14d4ad6a38c2d28cd5a653496f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 23 Jan 2023 11:35:54 -0800 Subject: [PATCH 8/9] Fix docstring validation --- pandas/core/frame.py | 28 +++++++++++++--------------- pandas/core/generic.py | 10 ++-------- pandas/core/series.py | 16 +++++++--------- 3 files changed, 22 insertions(+), 32 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7c8ac61780035..f8fdcbdfd34d4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -258,17 +258,18 @@ levels and/or column labels. - if `axis` is 1 or `'columns'` then `by` may contain column levels and/or index labels.""", - "optional_labels": """labels : array-like, optional - New labels / index to conform the axis specified by 'axis' to.""", - "optional_index": """index : array-like, optional - New labels for the index. Preferably an Index object to avoid - duplicating data.""", - "optional_columns": """columns : array-like, optional - New labels for the columns. Preferably an Index object to avoid - duplicating data.""", - "optional_axis": """axis : int or str, optional - Axis to target. Can be either the axis name ('index', 'columns') - or number (0, 1).""", + "optional_reindex": """ +labels : array-like, optional + New labels / index to conform the axis specified by 'axis' to. +index : array-like, optional + New labels for the index. Preferably an Index object to avoid + duplicating data. +columns : array-like, optional + New labels for the columns. Preferably an Index object to avoid + duplicating data. +axis : int or str, optional + Axis to target. Can be either the axis name ('index', 'columns') + or number (0, 1).""", "replace_iloc": """ This differs from updating with ``.loc`` or ``.iloc``, which require you to specify a location to update with some value.""", @@ -4997,10 +4998,7 @@ def set_axis( @doc( NDFrame.reindex, klass=_shared_doc_kwargs["klass"], - optional_labels=_shared_doc_kwargs["optional_labels"], - optional_index=_shared_doc_kwargs["optional_index"], - optional_columns=_shared_doc_kwargs["optional_columns"], - optional_axis=_shared_doc_kwargs["optional_axis"], + optional_reindex=_shared_doc_kwargs["optional_reindex"], ) def reindex( # type: ignore[override] self, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 54ec79c49c2fb..c06f6d7c85673 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5105,10 +5105,7 @@ def sort_index( @doc( klass=_shared_doc_kwargs["klass"], - optional_labels="", - optional_index="", - optional_columns="", - optional_axis="", + optional_reindex="", ) def reindex( self: NDFrameT, @@ -5132,10 +5129,7 @@ def reindex( Parameters ---------- - {optional_labels} - {optional_index} - {optional_columns} - {optional_axis} + {optional_reindex} method : {{None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}} Method to use for filling holes in reindexed DataFrame. Please note: this is only applicable to DataFrames/Series with a diff --git a/pandas/core/series.py b/pandas/core/series.py index 53755f604c1f9..dd2d9860c06e1 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -191,11 +191,12 @@ "duplicated": "Series", "optional_by": "", "optional_mapper": "", - "optional_labels": "", - "optional_index": "", - "optional_columns": "", - "optional_axis": """axis : int or str, optional - Unused.""", + "optional_reindex": """ + index : array-like, optional + New labels for the index. Preferably an Index object to avoid + duplicating data. + axis : int or str, optional + Unused""", "replace_iloc": """ This differs from updating with ``.loc`` or ``.iloc``, which require you to specify a location to update with some value.""", @@ -4866,10 +4867,7 @@ def set_axis( @doc( NDFrame.reindex, # type: ignore[has-type] klass=_shared_doc_kwargs["klass"], - optional_labels=_shared_doc_kwargs["optional_labels"], - optional_index=_shared_doc_kwargs["optional_index"], - optional_columns=_shared_doc_kwargs["optional_columns"], - optional_axis=_shared_doc_kwargs["optional_axis"], + optional_reindex=_shared_doc_kwargs["optional_reindex"], ) def reindex( # type: ignore[override] self, From c6e69e9f993fc4db829338a8d32fbfcbefb3572b Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 23 Jan 2023 20:35:17 -0800 Subject: [PATCH 9/9] doc validation --- pandas/core/series.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index dd2d9860c06e1..3d5b44ba52594 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -192,11 +192,11 @@ "optional_by": "", "optional_mapper": "", "optional_reindex": """ - index : array-like, optional - New labels for the index. Preferably an Index object to avoid - duplicating data. - axis : int or str, optional - Unused""", +index : array-like, optional + New labels for the index. Preferably an Index object to avoid + duplicating data. +axis : int or str, optional + Unused.""", "replace_iloc": """ This differs from updating with ``.loc`` or ``.iloc``, which require you to specify a location to update with some value.""",