From 22d53ccb85992dacdb3119f1e66662a8f7c6b63c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Mon, 2 Mar 2020 21:52:21 +0100 Subject: [PATCH 01/41] initial coommit & test --- pandas/core/indexes/base.py | 101 +++++++++++++++++- .../tests/indexes/base_class/test_replace.py | 21 ++++ 2 files changed, 121 insertions(+), 1 deletion(-) create mode 100644 pandas/tests/indexes/base_class/test_replace.py diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 057adceda7efd..0c6ae375a733d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -7,7 +7,7 @@ import numpy as np -from pandas._libs import algos as libalgos, index as libindex, lib +from pandas._libs import Timedelta, algos as libalgos, index as libindex, lib import pandas._libs.join as libjoin from pandas._libs.lib import is_datetime_array, no_default from pandas._libs.tslibs import OutOfBoundsDatetime, Timestamp @@ -32,6 +32,11 @@ is_bool_dtype, is_categorical_dtype, is_datetime64_any_dtype, +<<<<<<< HEAD +======= + is_datetime64tz_dtype, + is_dict_like, +>>>>>>> initial coommit & test is_dtype_equal, is_extension_array_dtype, is_float, @@ -1351,6 +1356,100 @@ def rename(self, name, inplace=False): """ return self.set_names([name], inplace=inplace) + def replace( + self, + to_replace=None, + value=None, + inplace=False, + limit=None, + regex=False, + method="pad", + ): + if inplace: + raise NotImplementedError("Can't perform inplace operation on Index.") + if not is_bool(regex) and to_replace is not None: + raise AssertionError("'to_replace' must be 'None' if 'regex' is not a bool") + + if value is None: + raise NotImplementedError() + else: + if is_dict_like(to_replace): + raise NotImplementedError() + elif is_list_like(to_replace): + if is_list_like(value): + if len(to_replace) != len(value): + # NOTE: Corresponding error message in core.generic.replace + # is not clear. Let's decide on one. + raise ValueError( + f"Length of `to_replace=` ({len(to_replace)}) should " + f"match length of `value=` ({len(value)})." + ) + if regex: + raise NotImplementedError() + + # copied method from BlockManager(), temporarily here + def replace_list( + self, values, src_list, dest_list, inplace=False, regex=False + ): + from pandas.core.internals.managers import ( + _compare_or_regex_search, + ) + from pandas.core.internals.managers import maybe_convert_objects + + if inplace: + raise NotImplementedError( + "Can't perform inplace operation on Index." + ) + + if regex: + raise NotImplementedError("TODO.") + + def comp(s, regex=False): + """ + Generate a bool array by perform an equality check, + or perform an element-wise regular expression + matching. + """ + if isna(s): + return isna(values) + if ( + isinstance(s, (Timedelta, Timestamp)) + and getattr(s, "tz", None) is None + ): + + return _compare_or_regex_search( + maybe_convert_objects(values), s.asm8, regex + ) + return _compare_or_regex_search(values, s, regex) + + masks = [comp(s, regex) for s in src_list] + + new_index = self.copy() # NOTE: no inplace, right? + zipped = zip(src_list, dest_list) + for i, (_, dest) in enumerate(zipped): + m = masks[i] + if m.any(): + new_index = new_index.putmask(mask=m, value=dest) + + return new_index + + new_index = replace_list( + self=self, + values=self.values, + src_list=to_replace, + dest_list=value, + inplace=inplace, + regex=regex, + ) + + else: + raise NotImplementedError() + elif to_replace is None: + raise NotImplementedError() + + # import ipdb; ipdb.set_trace() + return new_index + # -------------------------------------------------------------------- # Level-Centric Methods diff --git a/pandas/tests/indexes/base_class/test_replace.py b/pandas/tests/indexes/base_class/test_replace.py new file mode 100644 index 0000000000000..c2698bd09b21b --- /dev/null +++ b/pandas/tests/indexes/base_class/test_replace.py @@ -0,0 +1,21 @@ +# import pandas as pd +import pandas._testing as tm + + +def test_index_replace(): + index = pd.Index([1, 2, 3]) + expected = pd.Index(["a", 2, "c"]) + + result = index.replace([1, 3], ["a", "c"]) + + tm.assert_equal(result, expected) + + +if __name__ == "__main__": + # %load_ext autoreload + # %autoreload 2 + + import pandas as pd + + index = pd.Index([1, 2, 3]) + index.replace([1, 2], ["a", "b"]) From 47a14fa6be39e952a1c6efe1bee2f646e35308f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Mon, 2 Mar 2020 22:09:53 +0100 Subject: [PATCH 02/41] Initial commit & test --- pandas/core/indexes/base.py | 40 ++++++++++++++++++- .../tests/indexes/base_class/test_replace.py | 13 ++++-- 2 files changed, 47 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 0c6ae375a733d..e8443102c6693 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1371,10 +1371,46 @@ def replace( raise AssertionError("'to_replace' must be 'None' if 'regex' is not a bool") if value is None: - raise NotImplementedError() + if not is_dict_like(to_replace) and not is_dict_like(regex): + raise NotImplementedError() + + if isinstance(to_replace, (tuple, list)): + raise NotImplementedError() + + if not is_dict_like(to_replace): + raise NotImplementedError() + + items = list(to_replace.items()) + keys, values = zip(*items) if items else ([], []) + + are_mappings = [is_dict_like(v) for v in values] + + if any(are_mappings): + raise NotImplementedError() + else: + to_replace, value = keys, values + + return self.replace( + to_replace=to_replace, + value=value, + inplace=inplace, + limit=limit, + regex=regex, + ) + else: if is_dict_like(to_replace): - raise NotImplementedError() + if is_dict_like(value): + raise NotImplementedError( + "This won't happen for an Index, I think." + ) + elif not is_list_like(value): + raise NotImplementedError( + "This won't happen for an Index, I think." + ) + else: + raise TypeError("value argument must be scalar, dict, or Series") + elif is_list_like(to_replace): if is_list_like(value): if len(to_replace) != len(value): diff --git a/pandas/tests/indexes/base_class/test_replace.py b/pandas/tests/indexes/base_class/test_replace.py index c2698bd09b21b..060a460d94827 100644 --- a/pandas/tests/indexes/base_class/test_replace.py +++ b/pandas/tests/indexes/base_class/test_replace.py @@ -1,4 +1,4 @@ -# import pandas as pd +import pandas as pd import pandas._testing as tm @@ -11,11 +11,16 @@ def test_index_replace(): tm.assert_equal(result, expected) +def test_index_replace_2(): + index = pd.Index([1, 2, 3]) + expected = pd.Index(["a", 2, "c"]) + + result = index.replace({1: "a", 3: "c"}) + tm.assert_equal(result, expected) + + if __name__ == "__main__": # %load_ext autoreload # %autoreload 2 - - import pandas as pd - index = pd.Index([1, 2, 3]) index.replace([1, 2], ["a", "b"]) From fbbc7455562db7d9f023beb95ceb80e1601a9c97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Mon, 2 Mar 2020 22:26:47 +0100 Subject: [PATCH 03/41] Third case, not implementing --- pandas/core/indexes/base.py | 7 ++++++- pandas/tests/indexes/base_class/test_replace.py | 9 +++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e8443102c6693..d88f86f3b1cd9 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1372,7 +1372,12 @@ def replace( if value is None: if not is_dict_like(to_replace) and not is_dict_like(regex): - raise NotImplementedError() + raise NotImplementedError( + "This is implemented in NDFrame.replace(). However," + "not clear if we should include this in the API." + "See issue 5319 and PR 5600. But also note that this" + "use is not mentioned in the docs." + ) if isinstance(to_replace, (tuple, list)): raise NotImplementedError() diff --git a/pandas/tests/indexes/base_class/test_replace.py b/pandas/tests/indexes/base_class/test_replace.py index 060a460d94827..651ec4e5dcdca 100644 --- a/pandas/tests/indexes/base_class/test_replace.py +++ b/pandas/tests/indexes/base_class/test_replace.py @@ -1,3 +1,6 @@ +import numpy as np +import pytest + import pandas as pd import pandas._testing as tm @@ -19,6 +22,12 @@ def test_index_replace_2(): tm.assert_equal(result, expected) +def test_index_replace_3(): + index = pd.Index([1, None, 2]) + with pytest.raises(NotImplementedError): + index.replace(np.nan) + + if __name__ == "__main__": # %load_ext autoreload # %autoreload 2 From 5cd5d6575fd93a098c5254dacb4c351b34fea5fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Wed, 4 Mar 2020 20:43:14 +0100 Subject: [PATCH 04/41] replace values list with scalar --- pandas/core/indexes/base.py | 4 +++- pandas/tests/indexes/base_class/test_replace.py | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index d88f86f3b1cd9..86de66e17df6a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1484,7 +1484,9 @@ def comp(s, regex=False): ) else: - raise NotImplementedError() + mask = missing.mask_missing(self.values, to_replace) + new_index = self.putmask(mask, value) + elif to_replace is None: raise NotImplementedError() diff --git a/pandas/tests/indexes/base_class/test_replace.py b/pandas/tests/indexes/base_class/test_replace.py index 651ec4e5dcdca..2a2874d11134e 100644 --- a/pandas/tests/indexes/base_class/test_replace.py +++ b/pandas/tests/indexes/base_class/test_replace.py @@ -28,6 +28,14 @@ def test_index_replace_3(): index.replace(np.nan) +def test_index_replace_4(): + index = pd.Index([1, None, 2]) + expected = pd.Index(["a", None, "a"]) + + result = index.replace([1, 2], "a") + tm.assert_equal(expected, result) + + if __name__ == "__main__": # %load_ext autoreload # %autoreload 2 From 734c750af66dcd0e4245840670476a9fabaf5742 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Wed, 4 Mar 2020 21:19:53 +0100 Subject: [PATCH 05/41] backfill case --- pandas/core/indexes/base.py | 14 +++++++++++--- pandas/tests/indexes/base_class/test_replace.py | 8 ++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 86de66e17df6a..fec44d6f4a009 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1371,6 +1371,17 @@ def replace( raise AssertionError("'to_replace' must be 'None' if 'regex' is not a bool") if value is None: + if isinstance(to_replace, (tuple, list)): + fill_f = missing.get_fill_func(method) + mask = missing.mask_missing(self.values, to_replace) + new_index = fill_f(self.values, limit=limit, mask=mask) + new_index = new_index.astype(self.dtype) + + # analogous conditional returns in NDFrame.replace() + # it feels somewhat inconsistent since this is the only + # condition where there's a return, but well... + return self._constructor(new_index) + if not is_dict_like(to_replace) and not is_dict_like(regex): raise NotImplementedError( "This is implemented in NDFrame.replace(). However," @@ -1379,9 +1390,6 @@ def replace( "use is not mentioned in the docs." ) - if isinstance(to_replace, (tuple, list)): - raise NotImplementedError() - if not is_dict_like(to_replace): raise NotImplementedError() diff --git a/pandas/tests/indexes/base_class/test_replace.py b/pandas/tests/indexes/base_class/test_replace.py index 2a2874d11134e..41b3c4b2587e9 100644 --- a/pandas/tests/indexes/base_class/test_replace.py +++ b/pandas/tests/indexes/base_class/test_replace.py @@ -36,6 +36,14 @@ def test_index_replace_4(): tm.assert_equal(expected, result) +def test_index_replace_5(): + index = pd.Index([0, 1, 2, 3, 4]) + expected = pd.Index([0, 3, 3, 3, 4]) + + result = index.replace([1, 2], method="bfill") + tm.assert_equal(expected, result) + + if __name__ == "__main__": # %load_ext autoreload # %autoreload 2 From becbc090e705a9602f336058e22b7ba1388421c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Sat, 7 Mar 2020 13:18:36 +0100 Subject: [PATCH 06/41] Add replace_single and regex cases --- pandas/core/indexes/base.py | 148 ++++++++++++++++-- .../tests/indexes/base_class/test_replace.py | 33 ++++ 2 files changed, 171 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index fec44d6f4a009..904e9faa219e3 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1,6 +1,7 @@ from copy import copy as copy_func from datetime import datetime import operator +import re from textwrap import dedent from typing import TYPE_CHECKING, Any, Callable, FrozenSet, Hashable, Optional, Union import warnings @@ -67,6 +68,7 @@ ABCSeries, ABCTimedeltaIndex, ) +from pandas.core.dtypes.inference import is_re, is_re_compilable from pandas.core.dtypes.missing import array_equivalent, isna from pandas.core import ops @@ -1367,6 +1369,7 @@ def replace( ): if inplace: raise NotImplementedError("Can't perform inplace operation on Index.") + if not is_bool(regex) and to_replace is not None: raise AssertionError("'to_replace' must be 'None' if 'regex' is not a bool") @@ -1383,6 +1386,7 @@ def replace( return self._constructor(new_index) if not is_dict_like(to_replace) and not is_dict_like(regex): + # to_replace = [to_replace] raise NotImplementedError( "This is implemented in NDFrame.replace(). However," "not clear if we should include this in the API." @@ -1391,7 +1395,14 @@ def replace( ) if not is_dict_like(to_replace): - raise NotImplementedError() + if not is_dict_like(regex): + raise TypeError( + 'If "to_replace" and "value" are both None ' + 'and "to_replace" is not a list, then ' + "regex must be a mapping" + ) + to_replace = regex + regex = True items = list(to_replace.items()) keys, values = zip(*items) if items else ([], []) @@ -1423,7 +1434,6 @@ def replace( ) else: raise TypeError("value argument must be scalar, dict, or Series") - elif is_list_like(to_replace): if is_list_like(value): if len(to_replace) != len(value): @@ -1433,8 +1443,6 @@ def replace( f"Length of `to_replace=` ({len(to_replace)}) should " f"match length of `value=` ({len(value)})." ) - if regex: - raise NotImplementedError() # copied method from BlockManager(), temporarily here def replace_list( @@ -1450,9 +1458,6 @@ def replace_list( "Can't perform inplace operation on Index." ) - if regex: - raise NotImplementedError("TODO.") - def comp(s, regex=False): """ Generate a bool array by perform an equality check, @@ -1494,13 +1499,136 @@ def comp(s, regex=False): else: mask = missing.mask_missing(self.values, to_replace) new_index = self.putmask(mask, value) - elif to_replace is None: - raise NotImplementedError() + if is_re_compilable(regex): + to_replace = regex + regex = True + new_index = self.replace( + to_replace=to_replace, + value=value, + inplace=inplace, + limit=limit, + regex=regex, + method=method, + ) + else: + new_index = self._replace_single( + to_replace=to_replace, + value=value, + inplace=inplace, + filter=None, + regex=regex, + ) - # import ipdb; ipdb.set_trace() return new_index + def _replace_single( + self, + to_replace, + value, + inplace=False, + filter=None, + regex=False, + convert=True, + mask=None, + ): + """ + Replace elements by the given value. + + Parameters + ---------- + to_replace : object or pattern + Scalar to replace or regular expression to match. + value : object + Replacement object. + inplace : bool, default False + Perform inplace modification. + filter : list, optional + regex : bool, default False + If true, perform regular expression substitution. + convert : bool, default True + If true, try to coerce any object types to better types. + mask : array-like of bool, optional + True indicate corresponding element is ignored. + + Returns + ------- + a new block, the result after replacing + """ + # inplace = validate_bool_kwarg(inplace, "inplace") + # import ipdb; ipdb.set_trace() + + # to_replace is regex compilable + to_rep_re = regex and is_re_compilable(to_replace) + + # regex is regex compilable + regex_re = is_re_compilable(regex) + + # only one will survive + if to_rep_re and regex_re: + raise AssertionError( + "only one of to_replace and regex can be regex compilable" + ) + + # if regex was passed as something that can be a regex (rather than a + # boolean) + if regex_re: + to_replace = regex + + regex = regex_re or to_rep_re + + # try to get the pattern attribute (compiled re) or it's a string + if is_re(to_replace): + pattern = to_replace.pattern + else: + pattern = to_replace + + # if the pattern is not empty and to_replace is either a string or a + # regex + if regex and pattern: + rx = re.compile(to_replace) + else: + # if the thing to replace is not a string or compiled regex call + # the superclass method -> to_replace is some kind of object + return self.replace( + to_replace=[to_replace], value=[value], inplace=inplace, regex=regex, + ) + + new_values = self.values if inplace else self.values.copy() + + # deal with replacing values with objects (strings) that match but + # whose replacement is not a string (numeric, nan, object) + if isna(value) or not isinstance(value, str): + + def re_replacer(s): + if is_re(rx) and isinstance(s, str): + return value if rx.search(s) is not None else s + else: + return s + + else: + # value is guaranteed to be a string here, s can be either a string + # or null if it's null it gets returned + def re_replacer(s): + if is_re(rx) and isinstance(s, str): + return rx.sub(value, s) + else: + return s + + f = np.vectorize(re_replacer, otypes=[self.dtype]) + + if filter is None: + filt = slice(None) + else: + filt = self.mgr_locs.isin(filter).nonzero()[0] + + if mask is None: + new_values[filt] = f(new_values[filt]) + else: + new_values[filt][mask] = f(new_values[filt][mask]) + + return self._constructor(new_values) + # -------------------------------------------------------------------- # Level-Centric Methods diff --git a/pandas/tests/indexes/base_class/test_replace.py b/pandas/tests/indexes/base_class/test_replace.py index 41b3c4b2587e9..419679b32303f 100644 --- a/pandas/tests/indexes/base_class/test_replace.py +++ b/pandas/tests/indexes/base_class/test_replace.py @@ -14,6 +14,15 @@ def test_index_replace(): tm.assert_equal(result, expected) +def test_index_replace_1(): + index = pd.Index([1, 2, 3]) + expected = pd.Index(["a", 2, 3]) + + result = index.replace(1, "a") + + tm.assert_equal(result, expected) + + def test_index_replace_2(): index = pd.Index([1, 2, 3]) expected = pd.Index(["a", 2, "c"]) @@ -44,6 +53,30 @@ def test_index_replace_5(): tm.assert_equal(expected, result) +def test_index_replace_6(): + index = pd.Index(["bat", "foo", "baait", "bar"]) + expected = pd.Index(["new", "foo", "baait", "new"]) + + result = index.replace(to_replace=r"^ba.$", value="new", regex=True) + tm.assert_equal(expected, result) + + +def test_index_replace_7(): + index = pd.Index(["bat", "foo", "baait", "bar"]) + expected = pd.Index(["new", "foo", "baait", "new"]) + + result = index.replace(regex=r"^ba.$", value="new") + tm.assert_equal(expected, result) + + +def test_index_replace_8(): + index = pd.Index(["bat", "foo", "baait", "bar"]) + expected = pd.Index(["new", "xyz", "baait", "new"]) + + result = index.replace(regex={r"^ba.$": "new", "foo": "xyz"}) + tm.assert_equal(expected, result) + + if __name__ == "__main__": # %load_ext autoreload # %autoreload 2 From ae20f640499cfb9eb1cd7028fd4505e78628d5c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Sun, 8 Mar 2020 13:03:52 +0100 Subject: [PATCH 07/41] Not implementing inplace in light of #16529. --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 904e9faa219e3..f19806f767fb9 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1368,7 +1368,7 @@ def replace( method="pad", ): if inplace: - raise NotImplementedError("Can't perform inplace operation on Index.") + raise TypeError("Index can't be updated inplace.") if not is_bool(regex) and to_replace is not None: raise AssertionError("'to_replace' must be 'None' if 'regex' is not a bool") From 4fa5f11d76ce14fd58c3ab1c62a4d7eddbd0d972 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Sun, 8 Mar 2020 13:39:18 +0100 Subject: [PATCH 08/41] Clean unnecessary errors --- pandas/core/indexes/base.py | 108 +++++++----------- .../tests/indexes/base_class/test_replace.py | 7 ++ 2 files changed, 47 insertions(+), 68 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f19806f767fb9..acaff2b4b74e8 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1380,13 +1380,9 @@ def replace( new_index = fill_f(self.values, limit=limit, mask=mask) new_index = new_index.astype(self.dtype) - # analogous conditional returns in NDFrame.replace() - # it feels somewhat inconsistent since this is the only - # condition where there's a return, but well... return self._constructor(new_index) if not is_dict_like(to_replace) and not is_dict_like(regex): - # to_replace = [to_replace] raise NotImplementedError( "This is implemented in NDFrame.replace(). However," "not clear if we should include this in the API." @@ -1407,12 +1403,7 @@ def replace( items = list(to_replace.items()) keys, values = zip(*items) if items else ([], []) - are_mappings = [is_dict_like(v) for v in values] - - if any(are_mappings): - raise NotImplementedError() - else: - to_replace, value = keys, values + to_replace, value = keys, values return self.replace( to_replace=to_replace, @@ -1424,17 +1415,8 @@ def replace( else: if is_dict_like(to_replace): - if is_dict_like(value): - raise NotImplementedError( - "This won't happen for an Index, I think." - ) - elif not is_list_like(value): - raise NotImplementedError( - "This won't happen for an Index, I think." - ) - else: - raise TypeError("value argument must be scalar, dict, or Series") - elif is_list_like(to_replace): + raise TypeError("If `to_replace` is a dict, `value` should be None.") + if is_list_like(to_replace): if is_list_like(value): if len(to_replace) != len(value): # NOTE: Corresponding error message in core.generic.replace @@ -1444,51 +1426,7 @@ def replace( f"match length of `value=` ({len(value)})." ) - # copied method from BlockManager(), temporarily here - def replace_list( - self, values, src_list, dest_list, inplace=False, regex=False - ): - from pandas.core.internals.managers import ( - _compare_or_regex_search, - ) - from pandas.core.internals.managers import maybe_convert_objects - - if inplace: - raise NotImplementedError( - "Can't perform inplace operation on Index." - ) - - def comp(s, regex=False): - """ - Generate a bool array by perform an equality check, - or perform an element-wise regular expression - matching. - """ - if isna(s): - return isna(values) - if ( - isinstance(s, (Timedelta, Timestamp)) - and getattr(s, "tz", None) is None - ): - - return _compare_or_regex_search( - maybe_convert_objects(values), s.asm8, regex - ) - return _compare_or_regex_search(values, s, regex) - - masks = [comp(s, regex) for s in src_list] - - new_index = self.copy() # NOTE: no inplace, right? - zipped = zip(src_list, dest_list) - for i, (_, dest) in enumerate(zipped): - m = masks[i] - if m.any(): - new_index = new_index.putmask(mask=m, value=dest) - - return new_index - - new_index = replace_list( - self=self, + new_index = self.replace_list( values=self.values, src_list=to_replace, dest_list=value, @@ -1522,6 +1460,40 @@ def comp(s, regex=False): return new_index + def replace_list(self, values, src_list, dest_list, inplace=False, regex=False): + from pandas.core.internals.managers import _compare_or_regex_search + from pandas.core.internals.managers import maybe_convert_objects + + if inplace: + raise TypeError("Index can't be updated inplace.") + + def comp(s, regex=False): + """ + Generate a bool array by perform an equality check, + or perform an element-wise regular expression + matching. + """ + if isna(s): + return isna(values) + if isinstance(s, (Timedelta, Timestamp)) and getattr(s, "tz", None) is None: + + return _compare_or_regex_search( + maybe_convert_objects(values), s.asm8, regex + ) + return _compare_or_regex_search(values, s, regex) + + masks = [comp(s, regex) for s in src_list] + + # This is never inplace. + new_index = self.copy() + zipped = zip(src_list, dest_list) + for i, (_, dest) in enumerate(zipped): + m = masks[i] + if m.any(): + new_index = new_index.putmask(mask=m, value=dest) + + return new_index + def _replace_single( self, to_replace, @@ -1555,8 +1527,8 @@ def _replace_single( ------- a new block, the result after replacing """ - # inplace = validate_bool_kwarg(inplace, "inplace") - # import ipdb; ipdb.set_trace() + if inplace: + raise TypeError("Index can't be updated inplace.") # to_replace is regex compilable to_rep_re = regex and is_re_compilable(to_replace) diff --git a/pandas/tests/indexes/base_class/test_replace.py b/pandas/tests/indexes/base_class/test_replace.py index 419679b32303f..8a9f22e4cd40d 100644 --- a/pandas/tests/indexes/base_class/test_replace.py +++ b/pandas/tests/indexes/base_class/test_replace.py @@ -31,6 +31,13 @@ def test_index_replace_2(): tm.assert_equal(result, expected) +def test_index_replace_2_1(): + index = pd.Index([1, 2, 3]) + + with pytest.raises(TypeError): + index.replace({1: "a", 3: "c"}, "x") + + def test_index_replace_3(): index = pd.Index([1, None, 2]) with pytest.raises(NotImplementedError): From 4d3ec7e6197cf372658bd9bdb2580097731dc43f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Sun, 8 Mar 2020 13:42:42 +0100 Subject: [PATCH 09/41] remove unused inplace parameters/arguments --- pandas/core/indexes/base.py | 31 +++++-------------------------- 1 file changed, 5 insertions(+), 26 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index acaff2b4b74e8..273b604660532 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1430,7 +1430,6 @@ def replace( values=self.values, src_list=to_replace, dest_list=value, - inplace=inplace, regex=regex, ) @@ -1451,22 +1450,15 @@ def replace( ) else: new_index = self._replace_single( - to_replace=to_replace, - value=value, - inplace=inplace, - filter=None, - regex=regex, + to_replace=to_replace, value=value, filter=None, regex=regex, ) return new_index - def replace_list(self, values, src_list, dest_list, inplace=False, regex=False): + def replace_list(self, values, src_list, dest_list, regex=False): from pandas.core.internals.managers import _compare_or_regex_search from pandas.core.internals.managers import maybe_convert_objects - if inplace: - raise TypeError("Index can't be updated inplace.") - def comp(s, regex=False): """ Generate a bool array by perform an equality check, @@ -1495,14 +1487,7 @@ def comp(s, regex=False): return new_index def _replace_single( - self, - to_replace, - value, - inplace=False, - filter=None, - regex=False, - convert=True, - mask=None, + self, to_replace, value, filter=None, regex=False, convert=True, mask=None, ): """ Replace elements by the given value. @@ -1513,8 +1498,6 @@ def _replace_single( Scalar to replace or regular expression to match. value : object Replacement object. - inplace : bool, default False - Perform inplace modification. filter : list, optional regex : bool, default False If true, perform regular expression substitution. @@ -1527,8 +1510,6 @@ def _replace_single( ------- a new block, the result after replacing """ - if inplace: - raise TypeError("Index can't be updated inplace.") # to_replace is regex compilable to_rep_re = regex and is_re_compilable(to_replace) @@ -1562,11 +1543,9 @@ def _replace_single( else: # if the thing to replace is not a string or compiled regex call # the superclass method -> to_replace is some kind of object - return self.replace( - to_replace=[to_replace], value=[value], inplace=inplace, regex=regex, - ) + return self.replace(to_replace=[to_replace], value=[value], regex=regex,) - new_values = self.values if inplace else self.values.copy() + new_values = self.values.copy() # deal with replacing values with objects (strings) that match but # whose replacement is not a string (numeric, nan, object) From 1737372162575d84c2305ff9f8cfe16e1e07e24f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Sun, 8 Mar 2020 13:46:50 +0100 Subject: [PATCH 10/41] remove filter from _replace_single() method() --- pandas/core/indexes/base.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 273b604660532..91dae717b99c3 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1450,7 +1450,7 @@ def replace( ) else: new_index = self._replace_single( - to_replace=to_replace, value=value, filter=None, regex=regex, + to_replace=to_replace, value=value, regex=regex, ) return new_index @@ -1487,7 +1487,7 @@ def comp(s, regex=False): return new_index def _replace_single( - self, to_replace, value, filter=None, regex=False, convert=True, mask=None, + self, to_replace, value, regex=False, convert=True, mask=None, ): """ Replace elements by the given value. @@ -1498,7 +1498,6 @@ def _replace_single( Scalar to replace or regular expression to match. value : object Replacement object. - filter : list, optional regex : bool, default False If true, perform regular expression substitution. convert : bool, default True @@ -1568,15 +1567,10 @@ def re_replacer(s): f = np.vectorize(re_replacer, otypes=[self.dtype]) - if filter is None: - filt = slice(None) - else: - filt = self.mgr_locs.isin(filter).nonzero()[0] - if mask is None: - new_values[filt] = f(new_values[filt]) + new_values = f(new_values) else: - new_values[filt][mask] = f(new_values[filt][mask]) + new_values[mask] = f(new_values[mask]) return self._constructor(new_values) From 554ce48b38772e4682ae049049eff955e2484a76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Sun, 8 Mar 2020 13:47:42 +0100 Subject: [PATCH 11/41] Remove unnecessary comment --- pandas/core/indexes/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 91dae717b99c3..b1a567ace94ed 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1476,7 +1476,6 @@ def comp(s, regex=False): masks = [comp(s, regex) for s in src_list] - # This is never inplace. new_index = self.copy() zipped = zip(src_list, dest_list) for i, (_, dest) in enumerate(zipped): From 09f3a1513806cc12e0b09e8dec857b465e9583b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Sun, 8 Mar 2020 14:09:53 +0100 Subject: [PATCH 12/41] Add documentation to replace_list --- pandas/core/indexes/base.py | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b1a567ace94ed..f0b6f48248c88 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1427,10 +1427,7 @@ def replace( ) new_index = self.replace_list( - values=self.values, - src_list=to_replace, - dest_list=value, - regex=regex, + src_list=to_replace, dest_list=value, regex=regex, ) else: @@ -1455,7 +1452,26 @@ def replace( return new_index - def replace_list(self, values, src_list, dest_list, regex=False): + def replace_list(self, src_list, dest_list, regex=False): + """ + Replace elements of the index that are found in the `src_list` with the + elements in `dest_list`. + + Parameters + ---------- + src_list : list + List of elements to be replaced. + dest_list : list + List of elements to be replaced. + regex : bool, default False + If true, perform regular expression substitution. + + Returns + ------- + Index + The same type as the caller. + """ + from pandas.core.internals.managers import _compare_or_regex_search from pandas.core.internals.managers import maybe_convert_objects @@ -1466,13 +1482,13 @@ def comp(s, regex=False): matching. """ if isna(s): - return isna(values) + return isna(self.values) if isinstance(s, (Timedelta, Timestamp)) and getattr(s, "tz", None) is None: return _compare_or_regex_search( - maybe_convert_objects(values), s.asm8, regex + maybe_convert_objects(self.values), s.asm8, regex ) - return _compare_or_regex_search(values, s, regex) + return _compare_or_regex_search(self.values, s, regex) masks = [comp(s, regex) for s in src_list] From f81efc1f1a1dbf0baa6584f22f34c071f59a3b2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Sun, 8 Mar 2020 14:26:45 +0100 Subject: [PATCH 13/41] Fix import --- pandas/core/indexes/base.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f0b6f48248c88..2ba5fb3c913a9 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -33,11 +33,7 @@ is_bool_dtype, is_categorical_dtype, is_datetime64_any_dtype, -<<<<<<< HEAD -======= - is_datetime64tz_dtype, is_dict_like, ->>>>>>> initial coommit & test is_dtype_equal, is_extension_array_dtype, is_float, From e1711d85c3230fada287be8cfca289d3a9976bfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Sun, 8 Mar 2020 17:48:35 +0100 Subject: [PATCH 14/41] Address minor comments - i --- pandas/core/indexes/base.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2ba5fb3c913a9..ebc31914a34b6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -76,6 +76,10 @@ import pandas.core.common as com from pandas.core.indexers import deprecate_ndim_indexing from pandas.core.indexes.frozen import FrozenList +from pandas.core.internals.managers import ( + _compare_or_regex_search, + maybe_convert_objects, +) import pandas.core.missing as missing from pandas.core.ops import get_op_result_name from pandas.core.ops.invalid import make_invalid_op @@ -1389,8 +1393,8 @@ def replace( if not is_dict_like(to_replace): if not is_dict_like(regex): raise TypeError( - 'If "to_replace" and "value" are both None ' - 'and "to_replace" is not a list, then ' + "If 'to_replace' and 'value' are both None " + "and 'to_replace' is not a list, then " "regex must be a mapping" ) to_replace = regex @@ -1411,7 +1415,7 @@ def replace( else: if is_dict_like(to_replace): - raise TypeError("If `to_replace` is a dict, `value` should be None.") + raise TypeError("If 'to_replace' is a dict, 'value' should be None.") if is_list_like(to_replace): if is_list_like(value): if len(to_replace) != len(value): @@ -1468,9 +1472,6 @@ def replace_list(self, src_list, dest_list, regex=False): The same type as the caller. """ - from pandas.core.internals.managers import _compare_or_regex_search - from pandas.core.internals.managers import maybe_convert_objects - def comp(s, regex=False): """ Generate a bool array by perform an equality check, From 1ebc201f904ff1744420acbf58c9c0cbd32ff2c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Sun, 8 Mar 2020 18:00:22 +0100 Subject: [PATCH 15/41] Revert moving imports to import section --- pandas/core/indexes/base.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ebc31914a34b6..369976609ad13 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -76,10 +76,6 @@ import pandas.core.common as com from pandas.core.indexers import deprecate_ndim_indexing from pandas.core.indexes.frozen import FrozenList -from pandas.core.internals.managers import ( - _compare_or_regex_search, - maybe_convert_objects, -) import pandas.core.missing as missing from pandas.core.ops import get_op_result_name from pandas.core.ops.invalid import make_invalid_op @@ -1478,6 +1474,12 @@ def comp(s, regex=False): or perform an element-wise regular expression matching. """ + + from pandas.core.internals.managers import ( + _compare_or_regex_search, + maybe_convert_objects, + ) + if isna(s): return isna(self.values) if isinstance(s, (Timedelta, Timestamp)) and getattr(s, "tz", None) is None: From f93adb8ece287aeb35d5d9a03763d92d0e587d70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Sun, 8 Mar 2020 18:15:26 +0100 Subject: [PATCH 16/41] Minor comments and parametrized tests --- pandas/core/indexes/base.py | 4 +- .../tests/indexes/base_class/test_replace.py | 106 +++++++----------- 2 files changed, 45 insertions(+), 65 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 369976609ad13..ffeae2b651276 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1418,8 +1418,8 @@ def replace( # NOTE: Corresponding error message in core.generic.replace # is not clear. Let's decide on one. raise ValueError( - f"Length of `to_replace=` ({len(to_replace)}) should " - f"match length of `value=` ({len(value)})." + f"Length of 'to_replace=' ({len(to_replace)}) should " + f"match length of 'value=' ({len(value)})." ) new_index = self.replace_list( diff --git a/pandas/tests/indexes/base_class/test_replace.py b/pandas/tests/indexes/base_class/test_replace.py index 8a9f22e4cd40d..a71de2b54f234 100644 --- a/pandas/tests/indexes/base_class/test_replace.py +++ b/pandas/tests/indexes/base_class/test_replace.py @@ -5,87 +5,67 @@ import pandas._testing as tm -def test_index_replace(): - index = pd.Index([1, 2, 3]) - expected = pd.Index(["a", 2, "c"]) - - result = index.replace([1, 3], ["a", "c"]) - - tm.assert_equal(result, expected) - - -def test_index_replace_1(): - index = pd.Index([1, 2, 3]) - expected = pd.Index(["a", 2, 3]) - - result = index.replace(1, "a") +@pytest.mark.parametrize( + "index, to_replace, value, expected", + [ + ([1, 2, 3], [1, 3], ["a", "c"], ["a", 2, "c"]), + ([1, 2, 3], 1, "a", ["a", 2, 3]), + ([1, None, 2], [1, 2], "a", ["a", None, "a"],), + ], +) +def test_index_replace(index, to_replace, value, expected): + index = pd.Index(index) + expected = pd.Index(expected) + + result = index.replace(to_replace=to_replace, value=value) tm.assert_equal(result, expected) -def test_index_replace_2(): - index = pd.Index([1, 2, 3]) - expected = pd.Index(["a", 2, "c"]) - - result = index.replace({1: "a", 3: "c"}) - tm.assert_equal(result, expected) +@pytest.mark.parametrize( + "index, to_replace, value, regex, expected", + [ + ( + ["bat", "foo", "baait", "bar"], + r"^ba.$", + "new", + True, + ["new", "foo", "baait", "new"], + ), + ( + ["bat", "foo", "baait", "bar"], + None, + None, + {r"^ba.$": "new", "foo": "xyz"}, + ["new", "xyz", "baait", "new"], + ), + ], +) +def test_index_replace_regex(index, to_replace, value, regex, expected): + index = pd.Index(index) + expected = pd.Index(expected) + + result = index.replace(to_replace=to_replace, value=value, regex=regex) + tm.assert_equal(expected, result) -def test_index_replace_2_1(): +def test_index_replace_dict_and_value(): index = pd.Index([1, 2, 3]) - with pytest.raises(TypeError): + msg = "If 'to_replace' is a dict, 'value' should be None." + with pytest.raises(TypeError, match=msg): index.replace({1: "a", 3: "c"}, "x") -def test_index_replace_3(): +def test_index_replace_scalar_only(): index = pd.Index([1, None, 2]) with pytest.raises(NotImplementedError): index.replace(np.nan) -def test_index_replace_4(): - index = pd.Index([1, None, 2]) - expected = pd.Index(["a", None, "a"]) - - result = index.replace([1, 2], "a") - tm.assert_equal(expected, result) - - -def test_index_replace_5(): +def test_index_replace_bfill(): index = pd.Index([0, 1, 2, 3, 4]) expected = pd.Index([0, 3, 3, 3, 4]) result = index.replace([1, 2], method="bfill") tm.assert_equal(expected, result) - - -def test_index_replace_6(): - index = pd.Index(["bat", "foo", "baait", "bar"]) - expected = pd.Index(["new", "foo", "baait", "new"]) - - result = index.replace(to_replace=r"^ba.$", value="new", regex=True) - tm.assert_equal(expected, result) - - -def test_index_replace_7(): - index = pd.Index(["bat", "foo", "baait", "bar"]) - expected = pd.Index(["new", "foo", "baait", "new"]) - - result = index.replace(regex=r"^ba.$", value="new") - tm.assert_equal(expected, result) - - -def test_index_replace_8(): - index = pd.Index(["bat", "foo", "baait", "bar"]) - expected = pd.Index(["new", "xyz", "baait", "new"]) - - result = index.replace(regex={r"^ba.$": "new", "foo": "xyz"}) - tm.assert_equal(expected, result) - - -if __name__ == "__main__": - # %load_ext autoreload - # %autoreload 2 - index = pd.Index([1, 2, 3]) - index.replace([1, 2], ["a", "b"]) From 0475c863135c2db82f44b423edfaba2b7730b8b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Wed, 11 Mar 2020 19:24:59 +0100 Subject: [PATCH 17/41] Raise NotImplemented for Categorical- and MultiIndex --- pandas/core/indexes/category.py | 3 +++ pandas/core/indexes/multi.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 2a79c83de7ef2..357fdc466b299 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -760,6 +760,9 @@ def _wrap_joined_index( name = get_op_result_name(self, other) return self._create_from_codes(joined, name=name) + def replace(self, *args, **kwargs): + raise NotImplementedError("Replacing in CategoricalIndex is not supported.") + CategoricalIndex._add_numeric_methods_add_sub_disabled() CategoricalIndex._add_numeric_methods_disabled() diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index af70707bd3dfc..60e1a2f07082a 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3597,6 +3597,9 @@ def isin(self, values, level=None): return np.zeros(len(levs), dtype=np.bool_) return levs.isin(values) + def replace(self, *args, **kwargs): + raise NotImplementedError("Replacing in MultiIndex is not supported.") + MultiIndex._add_numeric_methods_disabled() MultiIndex._add_numeric_methods_add_sub_disabled() From 70896a7d27aa45e233a728ef9a4d7b5ceb510295 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Fri, 19 Jun 2020 12:23:48 +0200 Subject: [PATCH 18/41] commit test code --- pandas/tests/indexes/base_class/test_replace.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/indexes/base_class/test_replace.py b/pandas/tests/indexes/base_class/test_replace.py index a71de2b54f234..fb96ef719e8e8 100644 --- a/pandas/tests/indexes/base_class/test_replace.py +++ b/pandas/tests/indexes/base_class/test_replace.py @@ -69,3 +69,11 @@ def test_index_replace_bfill(): result = index.replace([1, 2], method="bfill") tm.assert_equal(expected, result) + + +# def test_multi_index_replace(): +# levels = [[1, 2], ["one", "two"]] +# codes = [[0, 0, 1, 1], [0, 1, 0, 1]] +# names = ["foo", "bar"] + +# # multi_index = MultiIndex(levels=levels, codes=codes, names=names) From 46c47124b8165b61d29259db8633d19273c45cd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Fri, 19 Jun 2020 18:56:24 +0200 Subject: [PATCH 19/41] reuse code & add tests --- pandas/core/indexes/base.py | 241 +----------------- pandas/core/indexes/multi.py | 13 +- .../tests/indexes/base_class/test_replace.py | 21 +- pandas/tests/indexes/multi/test_replace.py | 70 +++++ 4 files changed, 95 insertions(+), 250 deletions(-) create mode 100644 pandas/tests/indexes/multi/test_replace.py diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ffeae2b651276..5ad64ef9f0d1d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1,14 +1,13 @@ from copy import copy as copy_func from datetime import datetime import operator -import re from textwrap import dedent from typing import TYPE_CHECKING, Any, Callable, FrozenSet, Hashable, Optional, Union import warnings import numpy as np -from pandas._libs import Timedelta, algos as libalgos, index as libindex, lib +from pandas._libs import algos as libalgos, index as libindex, lib import pandas._libs.join as libjoin from pandas._libs.lib import is_datetime_array, no_default from pandas._libs.tslibs import OutOfBoundsDatetime, Timestamp @@ -33,7 +32,6 @@ is_bool_dtype, is_categorical_dtype, is_datetime64_any_dtype, - is_dict_like, is_dtype_equal, is_extension_array_dtype, is_float, @@ -64,7 +62,6 @@ ABCSeries, ABCTimedeltaIndex, ) -from pandas.core.dtypes.inference import is_re, is_re_compilable from pandas.core.dtypes.missing import array_equivalent, isna from pandas.core import ops @@ -874,7 +871,7 @@ def _format_data(self, name=None) -> str_t: if self.inferred_type == "string": is_justify = False elif self.inferred_type == "categorical": - if is_object_dtype(self.categories): # type: ignore + if is_object_dtype(self.categories): is_justify = False return format_object_summary( @@ -1355,239 +1352,15 @@ def rename(self, name, inplace=False): return self.set_names([name], inplace=inplace) def replace( - self, - to_replace=None, - value=None, - inplace=False, - limit=None, - regex=False, - method="pad", + self, to_replace=None, value=None, limit=None, regex=False, method="pad", ): - if inplace: - raise TypeError("Index can't be updated inplace.") - - if not is_bool(regex) and to_replace is not None: - raise AssertionError("'to_replace' must be 'None' if 'regex' is not a bool") - - if value is None: - if isinstance(to_replace, (tuple, list)): - fill_f = missing.get_fill_func(method) - mask = missing.mask_missing(self.values, to_replace) - new_index = fill_f(self.values, limit=limit, mask=mask) - new_index = new_index.astype(self.dtype) - - return self._constructor(new_index) - - if not is_dict_like(to_replace) and not is_dict_like(regex): - raise NotImplementedError( - "This is implemented in NDFrame.replace(). However," - "not clear if we should include this in the API." - "See issue 5319 and PR 5600. But also note that this" - "use is not mentioned in the docs." - ) - - if not is_dict_like(to_replace): - if not is_dict_like(regex): - raise TypeError( - "If 'to_replace' and 'value' are both None " - "and 'to_replace' is not a list, then " - "regex must be a mapping" - ) - to_replace = regex - regex = True - - items = list(to_replace.items()) - keys, values = zip(*items) if items else ([], []) - - to_replace, value = keys, values - - return self.replace( - to_replace=to_replace, - value=value, - inplace=inplace, - limit=limit, - regex=regex, - ) - - else: - if is_dict_like(to_replace): - raise TypeError("If 'to_replace' is a dict, 'value' should be None.") - if is_list_like(to_replace): - if is_list_like(value): - if len(to_replace) != len(value): - # NOTE: Corresponding error message in core.generic.replace - # is not clear. Let's decide on one. - raise ValueError( - f"Length of 'to_replace=' ({len(to_replace)}) should " - f"match length of 'value=' ({len(value)})." - ) - - new_index = self.replace_list( - src_list=to_replace, dest_list=value, regex=regex, - ) - - else: - mask = missing.mask_missing(self.values, to_replace) - new_index = self.putmask(mask, value) - elif to_replace is None: - if is_re_compilable(regex): - to_replace = regex - regex = True - new_index = self.replace( - to_replace=to_replace, - value=value, - inplace=inplace, - limit=limit, - regex=regex, - method=method, - ) - else: - new_index = self._replace_single( - to_replace=to_replace, value=value, regex=regex, - ) - - return new_index - - def replace_list(self, src_list, dest_list, regex=False): - """ - Replace elements of the index that are found in the `src_list` with the - elements in `dest_list`. - - Parameters - ---------- - src_list : list - List of elements to be replaced. - dest_list : list - List of elements to be replaced. - regex : bool, default False - If true, perform regular expression substitution. - - Returns - ------- - Index - The same type as the caller. - """ - - def comp(s, regex=False): - """ - Generate a bool array by perform an equality check, - or perform an element-wise regular expression - matching. - """ - - from pandas.core.internals.managers import ( - _compare_or_regex_search, - maybe_convert_objects, - ) - - if isna(s): - return isna(self.values) - if isinstance(s, (Timedelta, Timestamp)) and getattr(s, "tz", None) is None: - - return _compare_or_regex_search( - maybe_convert_objects(self.values), s.asm8, regex - ) - return _compare_or_regex_search(self.values, s, regex) - - masks = [comp(s, regex) for s in src_list] - - new_index = self.copy() - zipped = zip(src_list, dest_list) - for i, (_, dest) in enumerate(zipped): - m = masks[i] - if m.any(): - new_index = new_index.putmask(mask=m, value=dest) + new_index = self.to_series().replace( + to_replace=to_replace, value=value, limit=limit, regex=regex, method=method + ) + new_index = Index(new_index) return new_index - def _replace_single( - self, to_replace, value, regex=False, convert=True, mask=None, - ): - """ - Replace elements by the given value. - - Parameters - ---------- - to_replace : object or pattern - Scalar to replace or regular expression to match. - value : object - Replacement object. - regex : bool, default False - If true, perform regular expression substitution. - convert : bool, default True - If true, try to coerce any object types to better types. - mask : array-like of bool, optional - True indicate corresponding element is ignored. - - Returns - ------- - a new block, the result after replacing - """ - - # to_replace is regex compilable - to_rep_re = regex and is_re_compilable(to_replace) - - # regex is regex compilable - regex_re = is_re_compilable(regex) - - # only one will survive - if to_rep_re and regex_re: - raise AssertionError( - "only one of to_replace and regex can be regex compilable" - ) - - # if regex was passed as something that can be a regex (rather than a - # boolean) - if regex_re: - to_replace = regex - - regex = regex_re or to_rep_re - - # try to get the pattern attribute (compiled re) or it's a string - if is_re(to_replace): - pattern = to_replace.pattern - else: - pattern = to_replace - - # if the pattern is not empty and to_replace is either a string or a - # regex - if regex and pattern: - rx = re.compile(to_replace) - else: - # if the thing to replace is not a string or compiled regex call - # the superclass method -> to_replace is some kind of object - return self.replace(to_replace=[to_replace], value=[value], regex=regex,) - - new_values = self.values.copy() - - # deal with replacing values with objects (strings) that match but - # whose replacement is not a string (numeric, nan, object) - if isna(value) or not isinstance(value, str): - - def re_replacer(s): - if is_re(rx) and isinstance(s, str): - return value if rx.search(s) is not None else s - else: - return s - - else: - # value is guaranteed to be a string here, s can be either a string - # or null if it's null it gets returned - def re_replacer(s): - if is_re(rx) and isinstance(s, str): - return rx.sub(value, s) - else: - return s - - f = np.vectorize(re_replacer, otypes=[self.dtype]) - - if mask is None: - new_values = f(new_values) - else: - new_values[mask] = f(new_values[mask]) - - return self._constructor(new_values) - # -------------------------------------------------------------------- # Level-Centric Methods diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 60e1a2f07082a..1e08e8d8db452 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3597,8 +3597,17 @@ def isin(self, values, level=None): return np.zeros(len(levs), dtype=np.bool_) return levs.isin(values) - def replace(self, *args, **kwargs): - raise NotImplementedError("Replacing in MultiIndex is not supported.") + def replace( + self, to_replace=None, value=None, limit=None, regex=False, method="pad" + ): + names = self.names + + result = self.to_frame().replace( + to_replace=to_replace, value=value, limit=limit, regex=regex, method=method + ) + new_multi_index = self.from_frame(result, names=names) + + return new_multi_index MultiIndex._add_numeric_methods_disabled() diff --git a/pandas/tests/indexes/base_class/test_replace.py b/pandas/tests/indexes/base_class/test_replace.py index fb96ef719e8e8..e555bce9ebd63 100644 --- a/pandas/tests/indexes/base_class/test_replace.py +++ b/pandas/tests/indexes/base_class/test_replace.py @@ -1,4 +1,3 @@ -import numpy as np import pytest import pandas as pd @@ -52,17 +51,11 @@ def test_index_replace_regex(index, to_replace, value, regex, expected): def test_index_replace_dict_and_value(): index = pd.Index([1, 2, 3]) - msg = "If 'to_replace' is a dict, 'value' should be None." - with pytest.raises(TypeError, match=msg): + msg = "Series.replace cannot use dict-like to_replace and non-None value" + with pytest.raises(ValueError, match=msg): index.replace({1: "a", 3: "c"}, "x") -def test_index_replace_scalar_only(): - index = pd.Index([1, None, 2]) - with pytest.raises(NotImplementedError): - index.replace(np.nan) - - def test_index_replace_bfill(): index = pd.Index([0, 1, 2, 3, 4]) expected = pd.Index([0, 3, 3, 3, 4]) @@ -71,9 +64,9 @@ def test_index_replace_bfill(): tm.assert_equal(expected, result) -# def test_multi_index_replace(): -# levels = [[1, 2], ["one", "two"]] -# codes = [[0, 0, 1, 1], [0, 1, 0, 1]] -# names = ["foo", "bar"] +def test_index_name_preserved(): + index = pd.Index(range(2), name="foo") + expected = pd.Index([0, 0], name="foo") -# # multi_index = MultiIndex(levels=levels, codes=codes, names=names) + result = index.replace(1, 0) + tm.assert_equal(expected, result) diff --git a/pandas/tests/indexes/multi/test_replace.py b/pandas/tests/indexes/multi/test_replace.py new file mode 100644 index 0000000000000..3b099680b7d30 --- /dev/null +++ b/pandas/tests/indexes/multi/test_replace.py @@ -0,0 +1,70 @@ +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize( + "names, arrays, to_replace, value, expected_arrays", + [ + ( + [None, None], + [[1, 1, 2, 2], ["red", "blue", "red", "blue"]], + [1, "red"], + [0, "black"], + [[0, 0, 2, 2], ["black", "blue", "black", "blue"]], + ), + # names should be preserved + ( + ["digits", "colors"], + [[1, 1, 2, 2], ["red", "blue", "red", "blue"]], + 1, + 0, + [[0, 0, 2, 2], ["red", "blue", "red", "blue"]], + ), + ( + [None, None], + [[1, 1, 2, 2], ["red", "blue", "red", "blue"]], + 1, + 0, + [[0, 0, 2, 2], ["red", "blue", "red", "blue"]], + ), + ( + [None, None], + [[1, 1, 2, 2], ["red", "blue", "red", "blue"]], + [1, 2], + 0, + [[0, 0, 0, 0], ["red", "blue", "red", "blue"]], + ), + ( + [None, None], + [[1, 1, 2, 2], ["red", "blue", "red", "blue"]], + [1, 2], + 0, + [[0, 0, 0, 0], ["red", "blue", "red", "blue"]], + ), + # nested dicts + ( + ["digits", "colors"], + [[1, 1, 2, 2], ["red", "blue", "red", "blue"]], + {"digits": {1: 0}, "colors": {"red": "black"}}, + None, + [[0, 0, 2, 2], ["black", "blue", "black", "blue"]], + ), + # dicts and value + ( + ["digits", "colors"], + [[1, 1, 2, 2], ["red", "blue", "red", "blue"]], + {"digits": [1], "colors": ["red", "blue"]}, + "x", + [["x", "x", 2, 2], ["x", "x", "x", "x"]], + ), + ], +) +def test_multi_index_replace(names, arrays, to_replace, value, expected_arrays): + multi_index = pd.MultiIndex.from_arrays(arrays, names=names) + expected = pd.MultiIndex.from_arrays(expected_arrays, names=names) + + result = multi_index.replace(to_replace=to_replace, value=value) + + tm.assert_equal(result, expected) From 786208b32f78a87b9edd7cc334baa1ad1e46ae69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Sat, 20 Jun 2020 15:20:01 +0200 Subject: [PATCH 20/41] Again, add type ignore --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5ad64ef9f0d1d..1f108d4fde8c4 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -870,7 +870,7 @@ def _format_data(self, name=None) -> str_t: if self.inferred_type == "string": is_justify = False - elif self.inferred_type == "categorical": + elif self.inferred_type == "categorical": # type: ignore if is_object_dtype(self.categories): is_justify = False From 6dc5f8bac945474f057f198ab0062903cf6ef03f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Sat, 20 Jun 2020 15:41:32 +0200 Subject: [PATCH 21/41] Move type ignore to correct line... --- pandas/core/indexes/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 1f108d4fde8c4..852492d11b608 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -870,8 +870,8 @@ def _format_data(self, name=None) -> str_t: if self.inferred_type == "string": is_justify = False - elif self.inferred_type == "categorical": # type: ignore - if is_object_dtype(self.categories): + elif self.inferred_type == "categorical": + if is_object_dtype(self.categories): # type: ignore is_justify = False return format_object_summary( From 1512edf6623b2a6e338c81522070a9fe403e6f88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Sat, 18 Jul 2020 20:18:59 +0200 Subject: [PATCH 22/41] add docstrings --- pandas/core/common.py | 302 +++++++++++++++++++++++++++++++++- pandas/core/frame.py | 7 +- pandas/core/generic.py | 307 +---------------------------------- pandas/core/indexes/base.py | 6 + pandas/core/indexes/multi.py | 8 +- pandas/core/series.py | 7 +- 6 files changed, 333 insertions(+), 304 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index af24f8d707abd..b9330e65cbf37 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -8,7 +8,7 @@ from datetime import datetime, timedelta from functools import partial import inspect -from typing import Any, Collection, Iterable, List, Union +from typing import Any, Collection, Dict, Iterable, List, Union import warnings import numpy as np @@ -33,6 +33,306 @@ from pandas.core.dtypes.inference import _iterable_not_string from pandas.core.dtypes.missing import isna, isnull, notnull # noqa +_core_shared_docs: Dict[str, str] = dict( + to_replace=""" + Replace values given in `to_replace` with `value`. + + Values of the {klass} are replaced with other values dynamically. + {replace_iloc} + + Parameters + ---------- + to_replace : str, regex, list, dict, Series, int, float, or None + How to find the values that will be replaced. + + * numeric, str or regex: + + - numeric: numeric values equal to `to_replace` will be + replaced with `value` + - str: string exactly matching `to_replace` will be replaced + with `value` + - regex: regexs matching `to_replace` will be replaced with + `value` + + * list of str, regex, or numeric: + + - First, if `to_replace` and `value` are both lists, they + **must** be the same length. + - Second, if ``regex=True`` then all of the strings in **both** + lists will be interpreted as regexs otherwise they will match + directly. This doesn't matter much for `value` since there + are only a few possible substitution regexes you can use. + - str, regex and numeric rules apply as above. + + * dict: + + - Dicts can be used to specify different replacement values + for different existing values. For example, + ``{{'a': 'b', 'y': 'z'}}`` replaces the value 'a' with 'b' and + 'y' with 'z'. To use a dict in this way the `value` + parameter should be `None`. + - For a DataFrame a dict can specify that different values + should be replaced in different columns. For example, + ``{{'a': 1, 'b': 'z'}}`` looks for the value 1 in column 'a' + and the value 'z' in column 'b' and replaces these values + with whatever is specified in `value`. The `value` parameter + should not be ``None`` in this case. You can treat this as a + special case of passing two lists except that you are + specifying the column to search in. + - For a DataFrame nested dictionaries, e.g., + ``{{'a': {{'b': np.nan}}}}``, are read as follows: look in column + 'a' for the value 'b' and replace it with NaN. The `value` + parameter should be ``None`` to use a nested dict in this + way. You can nest regular expressions as well. Note that + column names (the top-level dictionary keys in a nested + dictionary) **cannot** be regular expressions. + + * None: + + - This means that the `regex` argument must be a string, + compiled regular expression, or list, dict, ndarray or + Series of such elements. If `value` is also ``None`` then + this **must** be a nested dictionary or Series. + + See the examples section for examples of each of these. + value : scalar, dict, list, str, regex, default None + Value to replace any values matching `to_replace` with. + For a DataFrame a dict of values can be used to specify which + value to use for each column (columns not in the dict will not be + filled). Regular expressions, strings and lists or dicts of such + objects are also allowed. + {inplace} + limit : int, default None + Maximum size gap to forward or backward fill. + regex : bool or same types as `to_replace`, default False + Whether to interpret `to_replace` and/or `value` as regular + expressions. If this is ``True`` then `to_replace` *must* be a + string. Alternatively, this could be a regular expression or a + list, dict, or array of regular expressions in which case + `to_replace` must be ``None``. + method : {{'pad', 'ffill', 'bfill', `None`}} + The method to use when for replacement, when `to_replace` is a + scalar, list or tuple and `value` is ``None``. + + .. versionchanged:: 0.23.0 + Added to DataFrame. + + Returns + ------- + {klass} + Object after replacement. + + Raises + ------ + AssertionError + * If `regex` is not a ``bool`` and `to_replace` is not + ``None``. + + TypeError + * If `to_replace` is not a scalar, array-like, ``dict``, or ``None`` + * If `to_replace` is a ``dict`` and `value` is not a ``list``, + ``dict``, ``ndarray``, or ``Series`` + * If `to_replace` is ``None`` and `regex` is not compilable + into a regular expression or is a list, dict, ndarray, or + Series. + * When replacing multiple ``bool`` or ``datetime64`` objects and + the arguments to `to_replace` does not match the type of the + value being replaced + + ValueError + * If a ``list`` or an ``ndarray`` is passed to `to_replace` and + `value` but they are not the same length. + + See Also + -------- + {klass}.fillna : Fill NA values. + {klass}.where : Replace values based on boolean condition. + Series.str.replace : Simple string replacement. + + Notes + ----- + * Regex substitution is performed under the hood with ``re.sub``. The + rules for substitution for ``re.sub`` are the same. + * Regular expressions will only substitute on strings, meaning you + cannot provide, for example, a regular expression matching floating + point numbers and expect the columns in your frame that have a + numeric dtype to be matched. However, if those floating point + numbers *are* strings, then you can do this. + * This method has *a lot* of options. You are encouraged to experiment + and play with this method to gain intuition about how it works. + * When dict is used as the `to_replace` value, it is like + key(s) in the dict are the to_replace part and + value(s) in the dict are the value parameter. + + Examples + -------- + + **Scalar `to_replace` and `value`** + + >>> s = pd.Series([0, 1, 2, 3, 4]) + >>> s.replace(0, 5) + 0 5 + 1 1 + 2 2 + 3 3 + 4 4 + dtype: int64 + + >>> df = pd.DataFrame({{'A': [0, 1, 2, 3, 4], + ... 'B': [5, 6, 7, 8, 9], + ... 'C': ['a', 'b', 'c', 'd', 'e']}}) + >>> df.replace(0, 5) + A B C + 0 5 5 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + + **List-like `to_replace`** + + >>> df.replace([0, 1, 2, 3], 4) + A B C + 0 4 5 a + 1 4 6 b + 2 4 7 c + 3 4 8 d + 4 4 9 e + + >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1]) + A B C + 0 4 5 a + 1 3 6 b + 2 2 7 c + 3 1 8 d + 4 4 9 e + + >>> s.replace([1, 2], method='bfill') + 0 0 + 1 3 + 2 3 + 3 3 + 4 4 + dtype: int64 + + **dict-like `to_replace`** + + >>> df.replace({{0: 10, 1: 100}}) + A B C + 0 10 5 a + 1 100 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + + >>> df.replace({{'A': 0, 'B': 5}}, 100) + A B C + 0 100 100 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + + >>> df.replace({{'A': {{0: 100, 4: 400}}}}) + A B C + 0 100 5 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 400 9 e + + **Regular expression `to_replace`** + + >>> df = pd.DataFrame({{'A': ['bat', 'foo', 'bait'], + ... 'B': ['abc', 'bar', 'xyz']}}) + >>> df.replace(to_replace=r'^ba.$', value='new', regex=True) + A B + 0 new abc + 1 foo new + 2 bait xyz + + >>> df.replace({{'A': r'^ba.$'}}, {{'A': 'new'}}, regex=True) + A B + 0 new abc + 1 foo bar + 2 bait xyz + + >>> df.replace(regex=r'^ba.$', value='new') + A B + 0 new abc + 1 foo new + 2 bait xyz + + >>> df.replace(regex={{r'^ba.$': 'new', 'foo': 'xyz'}}) + A B + 0 new abc + 1 xyz new + 2 bait xyz + + >>> df.replace(regex=[r'^ba.$', 'foo'], value='new') + A B + 0 new abc + 1 new new + 2 bait xyz + + Note that when replacing multiple ``bool`` or ``datetime64`` objects, + the data types in the `to_replace` parameter must match the data + type of the value being replaced: + + >>> df = pd.DataFrame({{'A': [True, False, True], + ... 'B': [False, True, False]}}) + >>> df.replace({{'a string': 'new value', True: False}}) # raises + Traceback (most recent call last): + ... + TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str' + + This raises a ``TypeError`` because one of the ``dict`` keys is not of + the correct type for replacement. + + Compare the behavior of ``s.replace({{'a': None}})`` and + ``s.replace('a', None)`` to understand the peculiarities + of the `to_replace` parameter: + + >>> s = pd.Series([10, 'a', 'a', 'b', 'a']) + + When one uses a dict as the `to_replace` value, it is like the + value(s) in the dict are equal to the `value` parameter. + ``s.replace({{'a': None}})`` is equivalent to + ``s.replace(to_replace={{'a': None}}, value=None, method=None)``: + + >>> s.replace({{'a': None}}) + 0 10 + 1 None + 2 None + 3 b + 4 None + dtype: object + + When ``value=None`` and `to_replace` is a scalar, list or + tuple, `replace` uses the method parameter (default 'pad') to do the + replacement. So this is why the 'a' values are being replaced by 10 + in rows 1 and 2 and 'b' in row 4 in this case. + The command ``s.replace('a', None)`` is actually equivalent to + ``s.replace(to_replace='a', value=None, method='pad')``: + + >>> s.replace('a', None) + 0 10 + 1 10 + 2 10 + 3 b + 4 b + dtype: object +""" +) +_core_shared_docs_kwargs: Dict[str, str] = dict( + inplace=""" + inplace : boolean, default False + If True, performs operation inplace and returns None.""", + replace_iloc=""" + This differs from updating with ``.loc`` or ``.iloc``, which require + you to specify a location to update with some value.""", +) + class SettingWithCopyError(ValueError): pass diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 39ca7ed47f7fa..5a3caeef6df2b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4263,7 +4263,12 @@ def fillna( downcast=downcast, ) - @doc(NDFrame.replace, **_shared_doc_kwargs) + @doc( + NDFrame.replace, + inplace=com._core_shared_docs_kwargs["inplace"], + replace_iloc=com._core_shared_docs_kwargs["replace_iloc"], + **_shared_doc_kwargs, + ) def replace( self, to_replace=None, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 701909c9df857..afff4a8664e5f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6129,309 +6129,16 @@ def bfill( backfill = bfill - @doc(klass=_shared_doc_kwargs["klass"]) + @doc( + com._core_shared_docs["to_replace"], + klass=_shared_doc_kwargs["klass"], + inplace=com._core_shared_docs_kwargs["inplace"], + replace_iloc=com._core_shared_docs_kwargs["replace_iloc"], + ) def replace( - self, - to_replace=None, - value=None, - inplace=False, - limit=None, - regex=False, - method="pad", + self, to_replace, value, inplace, limit, regex, method, ): - """ - Replace values given in `to_replace` with `value`. - - Values of the {klass} are replaced with other values dynamically. - This differs from updating with ``.loc`` or ``.iloc``, which require - you to specify a location to update with some value. - - Parameters - ---------- - to_replace : str, regex, list, dict, Series, int, float, or None - How to find the values that will be replaced. - - * numeric, str or regex: - - - numeric: numeric values equal to `to_replace` will be - replaced with `value` - - str: string exactly matching `to_replace` will be replaced - with `value` - - regex: regexs matching `to_replace` will be replaced with - `value` - - * list of str, regex, or numeric: - - - First, if `to_replace` and `value` are both lists, they - **must** be the same length. - - Second, if ``regex=True`` then all of the strings in **both** - lists will be interpreted as regexs otherwise they will match - directly. This doesn't matter much for `value` since there - are only a few possible substitution regexes you can use. - - str, regex and numeric rules apply as above. - - * dict: - - - Dicts can be used to specify different replacement values - for different existing values. For example, - ``{{'a': 'b', 'y': 'z'}}`` replaces the value 'a' with 'b' and - 'y' with 'z'. To use a dict in this way the `value` - parameter should be `None`. - - For a DataFrame a dict can specify that different values - should be replaced in different columns. For example, - ``{{'a': 1, 'b': 'z'}}`` looks for the value 1 in column 'a' - and the value 'z' in column 'b' and replaces these values - with whatever is specified in `value`. The `value` parameter - should not be ``None`` in this case. You can treat this as a - special case of passing two lists except that you are - specifying the column to search in. - - For a DataFrame nested dictionaries, e.g., - ``{{'a': {{'b': np.nan}}}}``, are read as follows: look in column - 'a' for the value 'b' and replace it with NaN. The `value` - parameter should be ``None`` to use a nested dict in this - way. You can nest regular expressions as well. Note that - column names (the top-level dictionary keys in a nested - dictionary) **cannot** be regular expressions. - - * None: - - - This means that the `regex` argument must be a string, - compiled regular expression, or list, dict, ndarray or - Series of such elements. If `value` is also ``None`` then - this **must** be a nested dictionary or Series. - - See the examples section for examples of each of these. - value : scalar, dict, list, str, regex, default None - Value to replace any values matching `to_replace` with. - For a DataFrame a dict of values can be used to specify which - value to use for each column (columns not in the dict will not be - filled). Regular expressions, strings and lists or dicts of such - objects are also allowed. - inplace : bool, default False - If True, in place. Note: this will modify any - other views on this object (e.g. a column from a DataFrame). - Returns the caller if this is True. - limit : int, default None - Maximum size gap to forward or backward fill. - regex : bool or same types as `to_replace`, default False - Whether to interpret `to_replace` and/or `value` as regular - expressions. If this is ``True`` then `to_replace` *must* be a - string. Alternatively, this could be a regular expression or a - list, dict, or array of regular expressions in which case - `to_replace` must be ``None``. - method : {{'pad', 'ffill', 'bfill', `None`}} - The method to use when for replacement, when `to_replace` is a - scalar, list or tuple and `value` is ``None``. - - .. versionchanged:: 0.23.0 - Added to DataFrame. - - Returns - ------- - {klass} - Object after replacement. - - Raises - ------ - AssertionError - * If `regex` is not a ``bool`` and `to_replace` is not - ``None``. - - TypeError - * If `to_replace` is not a scalar, array-like, ``dict``, or ``None`` - * If `to_replace` is a ``dict`` and `value` is not a ``list``, - ``dict``, ``ndarray``, or ``Series`` - * If `to_replace` is ``None`` and `regex` is not compilable - into a regular expression or is a list, dict, ndarray, or - Series. - * When replacing multiple ``bool`` or ``datetime64`` objects and - the arguments to `to_replace` does not match the type of the - value being replaced - - ValueError - * If a ``list`` or an ``ndarray`` is passed to `to_replace` and - `value` but they are not the same length. - - See Also - -------- - {klass}.fillna : Fill NA values. - {klass}.where : Replace values based on boolean condition. - Series.str.replace : Simple string replacement. - - Notes - ----- - * Regex substitution is performed under the hood with ``re.sub``. The - rules for substitution for ``re.sub`` are the same. - * Regular expressions will only substitute on strings, meaning you - cannot provide, for example, a regular expression matching floating - point numbers and expect the columns in your frame that have a - numeric dtype to be matched. However, if those floating point - numbers *are* strings, then you can do this. - * This method has *a lot* of options. You are encouraged to experiment - and play with this method to gain intuition about how it works. - * When dict is used as the `to_replace` value, it is like - key(s) in the dict are the to_replace part and - value(s) in the dict are the value parameter. - - Examples - -------- - **Scalar `to_replace` and `value`** - - >>> s = pd.Series([0, 1, 2, 3, 4]) - >>> s.replace(0, 5) - 0 5 - 1 1 - 2 2 - 3 3 - 4 4 - dtype: int64 - - >>> df = pd.DataFrame({{'A': [0, 1, 2, 3, 4], - ... 'B': [5, 6, 7, 8, 9], - ... 'C': ['a', 'b', 'c', 'd', 'e']}}) - >>> df.replace(0, 5) - A B C - 0 5 5 a - 1 1 6 b - 2 2 7 c - 3 3 8 d - 4 4 9 e - - **List-like `to_replace`** - - >>> df.replace([0, 1, 2, 3], 4) - A B C - 0 4 5 a - 1 4 6 b - 2 4 7 c - 3 4 8 d - 4 4 9 e - - >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1]) - A B C - 0 4 5 a - 1 3 6 b - 2 2 7 c - 3 1 8 d - 4 4 9 e - - >>> s.replace([1, 2], method='bfill') - 0 0 - 1 3 - 2 3 - 3 3 - 4 4 - dtype: int64 - - **dict-like `to_replace`** - - >>> df.replace({{0: 10, 1: 100}}) - A B C - 0 10 5 a - 1 100 6 b - 2 2 7 c - 3 3 8 d - 4 4 9 e - - >>> df.replace({{'A': 0, 'B': 5}}, 100) - A B C - 0 100 100 a - 1 1 6 b - 2 2 7 c - 3 3 8 d - 4 4 9 e - - >>> df.replace({{'A': {{0: 100, 4: 400}}}}) - A B C - 0 100 5 a - 1 1 6 b - 2 2 7 c - 3 3 8 d - 4 400 9 e - - **Regular expression `to_replace`** - - >>> df = pd.DataFrame({{'A': ['bat', 'foo', 'bait'], - ... 'B': ['abc', 'bar', 'xyz']}}) - >>> df.replace(to_replace=r'^ba.$', value='new', regex=True) - A B - 0 new abc - 1 foo new - 2 bait xyz - - >>> df.replace({{'A': r'^ba.$'}}, {{'A': 'new'}}, regex=True) - A B - 0 new abc - 1 foo bar - 2 bait xyz - - >>> df.replace(regex=r'^ba.$', value='new') - A B - 0 new abc - 1 foo new - 2 bait xyz - - >>> df.replace(regex={{r'^ba.$': 'new', 'foo': 'xyz'}}) - A B - 0 new abc - 1 xyz new - 2 bait xyz - - >>> df.replace(regex=[r'^ba.$', 'foo'], value='new') - A B - 0 new abc - 1 new new - 2 bait xyz - - Note that when replacing multiple ``bool`` or ``datetime64`` objects, - the data types in the `to_replace` parameter must match the data - type of the value being replaced: - - >>> df = pd.DataFrame({{'A': [True, False, True], - ... 'B': [False, True, False]}}) - >>> df.replace({{'a string': 'new value', True: False}}) # raises - Traceback (most recent call last): - ... - TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str' - - This raises a ``TypeError`` because one of the ``dict`` keys is not of - the correct type for replacement. - - Compare the behavior of ``s.replace({{'a': None}})`` and - ``s.replace('a', None)`` to understand the peculiarities - of the `to_replace` parameter: - - >>> s = pd.Series([10, 'a', 'a', 'b', 'a']) - - When one uses a dict as the `to_replace` value, it is like the - value(s) in the dict are equal to the `value` parameter. - ``s.replace({{'a': None}})`` is equivalent to - ``s.replace(to_replace={{'a': None}}, value=None, method=None)``: - - >>> s.replace({{'a': None}}) - 0 10 - 1 None - 2 None - 3 b - 4 None - dtype: object - - When ``value=None`` and `to_replace` is a scalar, list or - tuple, `replace` uses the method parameter (default 'pad') to do the - replacement. So this is why the 'a' values are being replaced by 10 - in rows 1 and 2 and 'b' in row 4 in this case. - The command ``s.replace('a', None)`` is actually equivalent to - ``s.replace(to_replace='a', value=None, method='pad')``: - - >>> s.replace('a', None) - 0 10 - 1 10 - 2 10 - 3 b - 4 b - dtype: object - """ if not ( is_scalar(to_replace) or is_re_compilable(to_replace) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 852492d11b608..792c54dbda3d6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1351,6 +1351,12 @@ def rename(self, name, inplace=False): """ return self.set_names([name], inplace=inplace) + @doc( + com._core_shared_docs["to_replace"], + klass=_index_doc_kwargs["klass"], + inplace=_index_doc_kwargs["inplace"], + replace_iloc="", + ) def replace( self, to_replace=None, value=None, limit=None, regex=False, method="pad", ): diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 1e08e8d8db452..d5d5c0277c45c 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3597,8 +3597,14 @@ def isin(self, values, level=None): return np.zeros(len(levs), dtype=np.bool_) return levs.isin(values) + @doc( + com._core_shared_docs["to_replace"], + klass=_index_doc_kwargs["klass"], + inplace=_index_doc_kwargs["inplace"], + replace_iloc="", + ) def replace( - self, to_replace=None, value=None, limit=None, regex=False, method="pad" + self, to_replace, value, limit, regex, method, ): names = self.names diff --git a/pandas/core/series.py b/pandas/core/series.py index cab8dd133b579..32a61feba642d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4492,7 +4492,12 @@ def fillna( downcast=downcast, ) - @doc(NDFrame.replace, klass=_shared_doc_kwargs["klass"]) + @doc( + NDFrame.replace, + klass=_shared_doc_kwargs["klass"], + inplace=_shared_doc_kwargs["inplace"], + replace_iloc=com._core_shared_docs_kwargs["replace_iloc"], + ) def replace( self, to_replace=None, From be1d0ac36c7d0a072cc847a4d0a8b715c042cbaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Sun, 19 Jul 2020 20:14:29 +0200 Subject: [PATCH 23/41] I had removed defaults by mistake, added them again --- pandas/core/generic.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 756e43cca7c22..8ee5e254a2647 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6129,7 +6129,13 @@ def bfill( replace_iloc=com._core_shared_docs_kwargs["replace_iloc"], ) def replace( - self, to_replace, value, inplace, limit, regex, method, + self, + to_replace=None, + value=None, + inplace=False, + limit=None, + regex=False, + method="pad", ): if not ( From aeb2759cacc20933d5ce0af231b3b84787130008 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Sun, 19 Jul 2020 20:40:13 +0200 Subject: [PATCH 24/41] Add defaults to multiindex as well --- pandas/core/indexes/multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 98dde1180bfda..d014838f52a14 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3613,7 +3613,7 @@ def isin(self, values, level=None): replace_iloc="", ) def replace( - self, to_replace, value, limit, regex, method, + self, to_replace=None, value=None, limit=None, regex=False, method="pad", ): names = self.names From db58359cbabadab1d19cd5bbd2f485ba522236ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Mon, 20 Jul 2020 09:50:59 +0200 Subject: [PATCH 25/41] Update whatsnew --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 43d1244c15d8a..cac9b96f815bf 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -340,6 +340,7 @@ Other enhancements - :meth:`DataFrame.agg` and :meth:`Series.agg` now accept named aggregation for renaming the output columns/indexes. (:issue:`26513`) - ``compute.use_numba`` now exists as a configuration option that utilizes the numba engine when available (:issue:`33966`) - :meth:`Series.plot` now supports asymmetric error bars. Previously, if :meth:`Series.plot` received a "2xN" array with error values for `yerr` and/or `xerr`, the left/lower values (first row) were mirrored, while the right/upper values (second row) were ignored. Now, the first row represents the left/lower error values and the second row the right/upper error values. (:issue:`9536`) +- :class:`Index` and :class:`MultiIndex` now has a `replace()` method (:issue:`19495`). .. --------------------------------------------------------------------------- From 097011269b0d11a833c40e7878c706fc7c5596c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Mon, 27 Jul 2020 18:20:45 +0200 Subject: [PATCH 26/41] Move shared docs to common.shared_docs --- pandas/core/common.py | 302 +------------------------------------ pandas/core/frame.py | 5 +- pandas/core/generic.py | 26 ++-- pandas/core/series.py | 33 ++-- pandas/core/shared_docs.py | 301 ++++++++++++++++++++++++++++++++++++ 5 files changed, 336 insertions(+), 331 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 60621d51f2203..e7260a9923ee0 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -9,7 +9,7 @@ from datetime import datetime, timedelta from functools import partial import inspect -from typing import Any, Collection, Dict, Iterable, Iterator, List, Union +from typing import Any, Collection, Iterable, Iterator, List, Union import warnings import numpy as np @@ -34,306 +34,6 @@ from pandas.core.dtypes.inference import _iterable_not_string from pandas.core.dtypes.missing import isna, isnull, notnull # noqa -_core_shared_docs: Dict[str, str] = dict( - to_replace=""" - Replace values given in `to_replace` with `value`. - - Values of the {klass} are replaced with other values dynamically. - {replace_iloc} - - Parameters - ---------- - to_replace : str, regex, list, dict, Series, int, float, or None - How to find the values that will be replaced. - - * numeric, str or regex: - - - numeric: numeric values equal to `to_replace` will be - replaced with `value` - - str: string exactly matching `to_replace` will be replaced - with `value` - - regex: regexs matching `to_replace` will be replaced with - `value` - - * list of str, regex, or numeric: - - - First, if `to_replace` and `value` are both lists, they - **must** be the same length. - - Second, if ``regex=True`` then all of the strings in **both** - lists will be interpreted as regexs otherwise they will match - directly. This doesn't matter much for `value` since there - are only a few possible substitution regexes you can use. - - str, regex and numeric rules apply as above. - - * dict: - - - Dicts can be used to specify different replacement values - for different existing values. For example, - ``{{'a': 'b', 'y': 'z'}}`` replaces the value 'a' with 'b' and - 'y' with 'z'. To use a dict in this way the `value` - parameter should be `None`. - - For a DataFrame a dict can specify that different values - should be replaced in different columns. For example, - ``{{'a': 1, 'b': 'z'}}`` looks for the value 1 in column 'a' - and the value 'z' in column 'b' and replaces these values - with whatever is specified in `value`. The `value` parameter - should not be ``None`` in this case. You can treat this as a - special case of passing two lists except that you are - specifying the column to search in. - - For a DataFrame nested dictionaries, e.g., - ``{{'a': {{'b': np.nan}}}}``, are read as follows: look in column - 'a' for the value 'b' and replace it with NaN. The `value` - parameter should be ``None`` to use a nested dict in this - way. You can nest regular expressions as well. Note that - column names (the top-level dictionary keys in a nested - dictionary) **cannot** be regular expressions. - - * None: - - - This means that the `regex` argument must be a string, - compiled regular expression, or list, dict, ndarray or - Series of such elements. If `value` is also ``None`` then - this **must** be a nested dictionary or Series. - - See the examples section for examples of each of these. - value : scalar, dict, list, str, regex, default None - Value to replace any values matching `to_replace` with. - For a DataFrame a dict of values can be used to specify which - value to use for each column (columns not in the dict will not be - filled). Regular expressions, strings and lists or dicts of such - objects are also allowed. - {inplace} - limit : int, default None - Maximum size gap to forward or backward fill. - regex : bool or same types as `to_replace`, default False - Whether to interpret `to_replace` and/or `value` as regular - expressions. If this is ``True`` then `to_replace` *must* be a - string. Alternatively, this could be a regular expression or a - list, dict, or array of regular expressions in which case - `to_replace` must be ``None``. - method : {{'pad', 'ffill', 'bfill', `None`}} - The method to use when for replacement, when `to_replace` is a - scalar, list or tuple and `value` is ``None``. - - .. versionchanged:: 0.23.0 - Added to DataFrame. - - Returns - ------- - {klass} - Object after replacement. - - Raises - ------ - AssertionError - * If `regex` is not a ``bool`` and `to_replace` is not - ``None``. - - TypeError - * If `to_replace` is not a scalar, array-like, ``dict``, or ``None`` - * If `to_replace` is a ``dict`` and `value` is not a ``list``, - ``dict``, ``ndarray``, or ``Series`` - * If `to_replace` is ``None`` and `regex` is not compilable - into a regular expression or is a list, dict, ndarray, or - Series. - * When replacing multiple ``bool`` or ``datetime64`` objects and - the arguments to `to_replace` does not match the type of the - value being replaced - - ValueError - * If a ``list`` or an ``ndarray`` is passed to `to_replace` and - `value` but they are not the same length. - - See Also - -------- - {klass}.fillna : Fill NA values. - {klass}.where : Replace values based on boolean condition. - Series.str.replace : Simple string replacement. - - Notes - ----- - * Regex substitution is performed under the hood with ``re.sub``. The - rules for substitution for ``re.sub`` are the same. - * Regular expressions will only substitute on strings, meaning you - cannot provide, for example, a regular expression matching floating - point numbers and expect the columns in your frame that have a - numeric dtype to be matched. However, if those floating point - numbers *are* strings, then you can do this. - * This method has *a lot* of options. You are encouraged to experiment - and play with this method to gain intuition about how it works. - * When dict is used as the `to_replace` value, it is like - key(s) in the dict are the to_replace part and - value(s) in the dict are the value parameter. - - Examples - -------- - - **Scalar `to_replace` and `value`** - - >>> s = pd.Series([0, 1, 2, 3, 4]) - >>> s.replace(0, 5) - 0 5 - 1 1 - 2 2 - 3 3 - 4 4 - dtype: int64 - - >>> df = pd.DataFrame({{'A': [0, 1, 2, 3, 4], - ... 'B': [5, 6, 7, 8, 9], - ... 'C': ['a', 'b', 'c', 'd', 'e']}}) - >>> df.replace(0, 5) - A B C - 0 5 5 a - 1 1 6 b - 2 2 7 c - 3 3 8 d - 4 4 9 e - - **List-like `to_replace`** - - >>> df.replace([0, 1, 2, 3], 4) - A B C - 0 4 5 a - 1 4 6 b - 2 4 7 c - 3 4 8 d - 4 4 9 e - - >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1]) - A B C - 0 4 5 a - 1 3 6 b - 2 2 7 c - 3 1 8 d - 4 4 9 e - - >>> s.replace([1, 2], method='bfill') - 0 0 - 1 3 - 2 3 - 3 3 - 4 4 - dtype: int64 - - **dict-like `to_replace`** - - >>> df.replace({{0: 10, 1: 100}}) - A B C - 0 10 5 a - 1 100 6 b - 2 2 7 c - 3 3 8 d - 4 4 9 e - - >>> df.replace({{'A': 0, 'B': 5}}, 100) - A B C - 0 100 100 a - 1 1 6 b - 2 2 7 c - 3 3 8 d - 4 4 9 e - - >>> df.replace({{'A': {{0: 100, 4: 400}}}}) - A B C - 0 100 5 a - 1 1 6 b - 2 2 7 c - 3 3 8 d - 4 400 9 e - - **Regular expression `to_replace`** - - >>> df = pd.DataFrame({{'A': ['bat', 'foo', 'bait'], - ... 'B': ['abc', 'bar', 'xyz']}}) - >>> df.replace(to_replace=r'^ba.$', value='new', regex=True) - A B - 0 new abc - 1 foo new - 2 bait xyz - - >>> df.replace({{'A': r'^ba.$'}}, {{'A': 'new'}}, regex=True) - A B - 0 new abc - 1 foo bar - 2 bait xyz - - >>> df.replace(regex=r'^ba.$', value='new') - A B - 0 new abc - 1 foo new - 2 bait xyz - - >>> df.replace(regex={{r'^ba.$': 'new', 'foo': 'xyz'}}) - A B - 0 new abc - 1 xyz new - 2 bait xyz - - >>> df.replace(regex=[r'^ba.$', 'foo'], value='new') - A B - 0 new abc - 1 new new - 2 bait xyz - - Note that when replacing multiple ``bool`` or ``datetime64`` objects, - the data types in the `to_replace` parameter must match the data - type of the value being replaced: - - >>> df = pd.DataFrame({{'A': [True, False, True], - ... 'B': [False, True, False]}}) - >>> df.replace({{'a string': 'new value', True: False}}) # raises - Traceback (most recent call last): - ... - TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str' - - This raises a ``TypeError`` because one of the ``dict`` keys is not of - the correct type for replacement. - - Compare the behavior of ``s.replace({{'a': None}})`` and - ``s.replace('a', None)`` to understand the peculiarities - of the `to_replace` parameter: - - >>> s = pd.Series([10, 'a', 'a', 'b', 'a']) - - When one uses a dict as the `to_replace` value, it is like the - value(s) in the dict are equal to the `value` parameter. - ``s.replace({{'a': None}})`` is equivalent to - ``s.replace(to_replace={{'a': None}}, value=None, method=None)``: - - >>> s.replace({{'a': None}}) - 0 10 - 1 None - 2 None - 3 b - 4 None - dtype: object - - When ``value=None`` and `to_replace` is a scalar, list or - tuple, `replace` uses the method parameter (default 'pad') to do the - replacement. So this is why the 'a' values are being replaced by 10 - in rows 1 and 2 and 'b' in row 4 in this case. - The command ``s.replace('a', None)`` is actually equivalent to - ``s.replace(to_replace='a', value=None, method='pad')``: - - >>> s.replace('a', None) - 0 10 - 1 10 - 2 10 - 3 b - 4 b - dtype: object -""" -) -_core_shared_docs_kwargs: Dict[str, str] = dict( - inplace=""" - inplace : boolean, default False - If True, performs operation inplace and returns None.""", - replace_iloc=""" - This differs from updating with ``.loc`` or ``.iloc``, which require - you to specify a location to update with some value.""", -) - class SettingWithCopyError(ValueError): pass diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 78082a3601179..e0be44a62cc9f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -141,6 +141,7 @@ ) from pandas.core.reshape.melt import melt from pandas.core.series import Series +from pandas.core.shared_docs import _shared_doc_kwargs as core_shared_doc_kwargs from pandas.core.sorting import ensure_key_mapped from pandas.io.common import get_filepath_or_buffer @@ -4362,8 +4363,8 @@ def pop(self, item: Label) -> Series: @doc( NDFrame.replace, - inplace=com._core_shared_docs_kwargs["inplace"], - replace_iloc=com._core_shared_docs_kwargs["replace_iloc"], + inplace=core_shared_doc_kwargs["inplace"], + replace_iloc=core_shared_doc_kwargs["replace_iloc"], **_shared_doc_kwargs, ) def replace( diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8ee5e254a2647..3f64b16e6a01a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -97,7 +97,7 @@ from pandas.core.internals import BlockManager from pandas.core.missing import find_valid_index from pandas.core.ops import _align_method_FRAME -from pandas.core.shared_docs import _shared_docs +from pandas.core.shared_docs import _shared_doc_kwargs, _shared_docs from pandas.io.formats import format as fmt from pandas.io.formats.format import DataFrameFormatter, format_percentiles @@ -109,14 +109,16 @@ # goal is to be able to define the docs close to function, while still being # able to share -_shared_doc_kwargs = dict( - axes="keywords for axes", - klass="Series/DataFrame", - axes_single_arg="int or labels for object", - args_transpose="axes to permute (int or label for object)", - optional_by=""" - by : str or list of str - Name or list of names to sort by""", +_shared_doc_kwargs.update( + dict( + axes="keywords for axes", + klass="Series/DataFrame", + axes_single_arg="int or labels for object", + args_transpose="axes to permute (int or label for object)", + optional_by=""" + by : str or list of str + Name or list of names to sort by""", + ) ) @@ -6123,10 +6125,10 @@ def bfill( backfill = bfill @doc( - com._core_shared_docs["to_replace"], + _shared_docs["to_replace"], klass=_shared_doc_kwargs["klass"], - inplace=com._core_shared_docs_kwargs["inplace"], - replace_iloc=com._core_shared_docs_kwargs["replace_iloc"], + inplace=_shared_doc_kwargs["inplace"], + replace_iloc=_shared_doc_kwargs["replace_iloc"], ) def replace( self, diff --git a/pandas/core/series.py b/pandas/core/series.py index 1d8663dc86adc..b5fb0cf9628a3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -87,6 +87,7 @@ from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.indexing import check_bool_indexer from pandas.core.internals import SingleBlockManager +from pandas.core.shared_docs import _shared_doc_kwargs from pandas.core.sorting import ensure_key_mapped from pandas.core.strings import StringMethods from pandas.core.tools.datetimes import to_datetime @@ -100,21 +101,21 @@ __all__ = ["Series"] -_shared_doc_kwargs = dict( - axes="index", - klass="Series", - axes_single_arg="{0 or 'index'}", - axis="""axis : {0 or 'index'} - Parameter needed for compatibility with DataFrame.""", - inplace="""inplace : boolean, default False - If True, performs operation inplace and returns None.""", - unique="np.ndarray", - duplicated="Series", - optional_by="", - optional_mapper="", - optional_labels="", - optional_axis="", - versionadded_to_excel="\n .. versionadded:: 0.20.0\n", +_shared_doc_kwargs.update( + dict( + axes="index", + klass="Series", + axes_single_arg="{0 or 'index'}", + axis="""axis : {0 or 'index'} + Parameter needed for compatibility with DataFrame.""", + unique="np.ndarray", + duplicated="Series", + optional_by="", + optional_mapper="", + optional_labels="", + optional_axis="", + versionadded_to_excel="\n .. versionadded:: 0.20.0\n", + ) ) @@ -4554,7 +4555,7 @@ def pop(self, item: Label) -> Any: NDFrame.replace, klass=_shared_doc_kwargs["klass"], inplace=_shared_doc_kwargs["inplace"], - replace_iloc=com._core_shared_docs_kwargs["replace_iloc"], + replace_iloc=_shared_doc_kwargs["replace_iloc"], ) def replace( self, diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index b81942f062b19..a58fd7281ff5f 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -116,3 +116,304 @@ 1 b B E 3 2 c B E 5 """ + +_shared_docs[ + "to_replace" +] = """ + Replace values given in `to_replace` with `value`. + + Values of the {klass} are replaced with other values dynamically. + {replace_iloc} + + Parameters + ---------- + to_replace : str, regex, list, dict, Series, int, float, or None + How to find the values that will be replaced. + + * numeric, str or regex: + + - numeric: numeric values equal to `to_replace` will be + replaced with `value` + - str: string exactly matching `to_replace` will be replaced + with `value` + - regex: regexs matching `to_replace` will be replaced with + `value` + + * list of str, regex, or numeric: + + - First, if `to_replace` and `value` are both lists, they + **must** be the same length. + - Second, if ``regex=True`` then all of the strings in **both** + lists will be interpreted as regexs otherwise they will match + directly. This doesn't matter much for `value` since there + are only a few possible substitution regexes you can use. + - str, regex and numeric rules apply as above. + + * dict: + + - Dicts can be used to specify different replacement values + for different existing values. For example, + ``{{'a': 'b', 'y': 'z'}}`` replaces the value 'a' with 'b' and + 'y' with 'z'. To use a dict in this way the `value` + parameter should be `None`. + - For a DataFrame a dict can specify that different values + should be replaced in different columns. For example, + ``{{'a': 1, 'b': 'z'}}`` looks for the value 1 in column 'a' + and the value 'z' in column 'b' and replaces these values + with whatever is specified in `value`. The `value` parameter + should not be ``None`` in this case. You can treat this as a + special case of passing two lists except that you are + specifying the column to search in. + - For a DataFrame nested dictionaries, e.g., + ``{{'a': {{'b': np.nan}}}}``, are read as follows: look in column + 'a' for the value 'b' and replace it with NaN. The `value` + parameter should be ``None`` to use a nested dict in this + way. You can nest regular expressions as well. Note that + column names (the top-level dictionary keys in a nested + dictionary) **cannot** be regular expressions. + + * None: + + - This means that the `regex` argument must be a string, + compiled regular expression, or list, dict, ndarray or + Series of such elements. If `value` is also ``None`` then + this **must** be a nested dictionary or Series. + + See the examples section for examples of each of these. + value : scalar, dict, list, str, regex, default None + Value to replace any values matching `to_replace` with. + For a DataFrame a dict of values can be used to specify which + value to use for each column (columns not in the dict will not be + filled). Regular expressions, strings and lists or dicts of such + objects are also allowed. + {inplace} + limit : int, default None + Maximum size gap to forward or backward fill. + regex : bool or same types as `to_replace`, default False + Whether to interpret `to_replace` and/or `value` as regular + expressions. If this is ``True`` then `to_replace` *must* be a + string. Alternatively, this could be a regular expression or a + list, dict, or array of regular expressions in which case + `to_replace` must be ``None``. + method : {{'pad', 'ffill', 'bfill', `None`}} + The method to use when for replacement, when `to_replace` is a + scalar, list or tuple and `value` is ``None``. + + .. versionchanged:: 0.23.0 + Added to DataFrame. + + Returns + ------- + {klass} + Object after replacement. + + Raises + ------ + AssertionError + * If `regex` is not a ``bool`` and `to_replace` is not + ``None``. + + TypeError + * If `to_replace` is not a scalar, array-like, ``dict``, or ``None`` + * If `to_replace` is a ``dict`` and `value` is not a ``list``, + ``dict``, ``ndarray``, or ``Series`` + * If `to_replace` is ``None`` and `regex` is not compilable + into a regular expression or is a list, dict, ndarray, or + Series. + * When replacing multiple ``bool`` or ``datetime64`` objects and + the arguments to `to_replace` does not match the type of the + value being replaced + + ValueError + * If a ``list`` or an ``ndarray`` is passed to `to_replace` and + `value` but they are not the same length. + + See Also + -------- + {klass}.fillna : Fill NA values. + {klass}.where : Replace values based on boolean condition. + Series.str.replace : Simple string replacement. + + Notes + ----- + * Regex substitution is performed under the hood with ``re.sub``. The + rules for substitution for ``re.sub`` are the same. + * Regular expressions will only substitute on strings, meaning you + cannot provide, for example, a regular expression matching floating + point numbers and expect the columns in your frame that have a + numeric dtype to be matched. However, if those floating point + numbers *are* strings, then you can do this. + * This method has *a lot* of options. You are encouraged to experiment + and play with this method to gain intuition about how it works. + * When dict is used as the `to_replace` value, it is like + key(s) in the dict are the to_replace part and + value(s) in the dict are the value parameter. + + Examples + -------- + + **Scalar `to_replace` and `value`** + + >>> s = pd.Series([0, 1, 2, 3, 4]) + >>> s.replace(0, 5) + 0 5 + 1 1 + 2 2 + 3 3 + 4 4 + dtype: int64 + + >>> df = pd.DataFrame({{'A': [0, 1, 2, 3, 4], + ... 'B': [5, 6, 7, 8, 9], + ... 'C': ['a', 'b', 'c', 'd', 'e']}}) + >>> df.replace(0, 5) + A B C + 0 5 5 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + + **List-like `to_replace`** + + >>> df.replace([0, 1, 2, 3], 4) + A B C + 0 4 5 a + 1 4 6 b + 2 4 7 c + 3 4 8 d + 4 4 9 e + + >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1]) + A B C + 0 4 5 a + 1 3 6 b + 2 2 7 c + 3 1 8 d + 4 4 9 e + + >>> s.replace([1, 2], method='bfill') + 0 0 + 1 3 + 2 3 + 3 3 + 4 4 + dtype: int64 + + **dict-like `to_replace`** + + >>> df.replace({{0: 10, 1: 100}}) + A B C + 0 10 5 a + 1 100 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + + >>> df.replace({{'A': 0, 'B': 5}}, 100) + A B C + 0 100 100 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + + >>> df.replace({{'A': {{0: 100, 4: 400}}}}) + A B C + 0 100 5 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 400 9 e + + **Regular expression `to_replace`** + + >>> df = pd.DataFrame({{'A': ['bat', 'foo', 'bait'], + ... 'B': ['abc', 'bar', 'xyz']}}) + >>> df.replace(to_replace=r'^ba.$', value='new', regex=True) + A B + 0 new abc + 1 foo new + 2 bait xyz + + >>> df.replace({{'A': r'^ba.$'}}, {{'A': 'new'}}, regex=True) + A B + 0 new abc + 1 foo bar + 2 bait xyz + + >>> df.replace(regex=r'^ba.$', value='new') + A B + 0 new abc + 1 foo new + 2 bait xyz + + >>> df.replace(regex={{r'^ba.$': 'new', 'foo': 'xyz'}}) + A B + 0 new abc + 1 xyz new + 2 bait xyz + + >>> df.replace(regex=[r'^ba.$', 'foo'], value='new') + A B + 0 new abc + 1 new new + 2 bait xyz + + Note that when replacing multiple ``bool`` or ``datetime64`` objects, + the data types in the `to_replace` parameter must match the data + type of the value being replaced: + + >>> df = pd.DataFrame({{'A': [True, False, True], + ... 'B': [False, True, False]}}) + >>> df.replace({{'a string': 'new value', True: False}}) # raises + Traceback (most recent call last): + ... + TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str' + + This raises a ``TypeError`` because one of the ``dict`` keys is not of + the correct type for replacement. + + Compare the behavior of ``s.replace({{'a': None}})`` and + ``s.replace('a', None)`` to understand the peculiarities + of the `to_replace` parameter: + + >>> s = pd.Series([10, 'a', 'a', 'b', 'a']) + + When one uses a dict as the `to_replace` value, it is like the + value(s) in the dict are equal to the `value` parameter. + ``s.replace({{'a': None}})`` is equivalent to + ``s.replace(to_replace={{'a': None}}, value=None, method=None)``: + + >>> s.replace({{'a': None}}) + 0 10 + 1 None + 2 None + 3 b + 4 None + dtype: object + + When ``value=None`` and `to_replace` is a scalar, list or + tuple, `replace` uses the method parameter (default 'pad') to do the + replacement. So this is why the 'a' values are being replaced by 10 + in rows 1 and 2 and 'b' in row 4 in this case. + The command ``s.replace('a', None)`` is actually equivalent to + ``s.replace(to_replace='a', value=None, method='pad')``: + + >>> s.replace('a', None) + 0 10 + 1 10 + 2 10 + 3 b + 4 b + dtype: object +""" + +_shared_doc_kwargs: Dict[str, str] = dict( + inplace=""" + inplace : boolean, default False + If True, performs operation inplace and returns None.""", + replace_iloc=""" + This differs from updating with ``.loc`` or ``.iloc``, which require + you to specify a location to update with some value.""", +) From 7bdc7a843c7457943f84b5bbbfb1a37af3177474 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Mon, 27 Jul 2020 18:33:05 +0200 Subject: [PATCH 27/41] Cache errors not caught by checks --- pandas/core/indexes/base.py | 3 ++- pandas/core/indexes/multi.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 3f789c1c12309..dbd88f7367655 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -85,6 +85,7 @@ import pandas.core.missing as missing from pandas.core.ops import get_op_result_name from pandas.core.ops.invalid import make_invalid_op +from pandas.core.shared_docs import _shared_docs from pandas.core.sorting import ensure_key_mapped from pandas.core.strings import StringMethods @@ -1367,7 +1368,7 @@ def rename(self, name, inplace=False): return self.set_names([name], inplace=inplace) @doc( - com._core_shared_docs["to_replace"], + _shared_docs["to_replace"], klass=_index_doc_kwargs["klass"], inplace=_index_doc_kwargs["inplace"], replace_iloc="", diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index d014838f52a14..d13fecbb1f795 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -50,6 +50,7 @@ from pandas.core.indexes.frozen import FrozenList from pandas.core.indexes.numeric import Int64Index import pandas.core.missing as missing +from pandas.core.shared_docs import _shared_docs from pandas.core.sorting import ( get_group_index, indexer_from_factorized, @@ -3607,7 +3608,7 @@ def isin(self, values, level=None): return levs.isin(values) @doc( - com._core_shared_docs["to_replace"], + _shared_docs["to_replace"], klass=_index_doc_kwargs["klass"], inplace=_index_doc_kwargs["inplace"], replace_iloc="", From e315717b6da94de2b3e457dbc3f76ab8af7f63e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Wed, 11 Nov 2020 20:58:11 +0100 Subject: [PATCH 28/41] 'public' shared_doc_kwargs --- pandas/core/frame.py | 44 ++++++++++++++++----------------- pandas/core/generic.py | 40 +++++++++++++++--------------- pandas/core/series.py | 50 +++++++++++++++++++------------------- pandas/core/shared_docs.py | 2 +- 4 files changed, 68 insertions(+), 68 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9c21d0b944601..5dfa51e892c77 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -155,7 +155,7 @@ ) from pandas.core.reshape.melt import melt from pandas.core.series import Series -from pandas.core.shared_docs import _shared_doc_kwargs as core_shared_doc_kwargs +from pandas.core.shared_docs import shared_doc_kwargs as coreshared_doc_kwargs from pandas.core.sorting import get_group_index, lexsort_indexer, nargsort from pandas.io.common import get_filepath_or_buffer @@ -173,7 +173,7 @@ # --------------------------------------------------------------------- # Docstring templates -_shared_doc_kwargs = dict( +shared_doc_kwargs = dict( axes="index, columns", klass="DataFrame", axes_single_arg="{0 or 'index', 1 or 'columns'}", @@ -2255,7 +2255,7 @@ def to_feather(self, path: FilePathOrBuffer[AnyStr], **kwargs) -> None: @doc( Series.to_markdown, - klass=_shared_doc_kwargs["klass"], + klass=shared_doc_kwargs["klass"], examples="""Examples -------- >>> df = pd.DataFrame( @@ -4035,7 +4035,7 @@ def _reindex_multi(self, axes, copy, fill_value) -> DataFrame: fill_value=fill_value, ) - @doc(NDFrame.align, **_shared_doc_kwargs) + @doc(NDFrame.align, **shared_doc_kwargs) def align( self, other, @@ -4095,7 +4095,7 @@ def align( """ ) @Substitution( - **_shared_doc_kwargs, + **shared_doc_kwargs, extended_summary_sub=" column or", axis_description_sub=", and 1 identifies the columns", see_also_sub=" or columns", @@ -4104,7 +4104,7 @@ def align( def set_axis(self, labels, axis: Axis = 0, inplace: bool = False): return super().set_axis(labels, axis=axis, inplace=inplace) - @Substitution(**_shared_doc_kwargs) + @Substitution(**shared_doc_kwargs) @Appender(NDFrame.reindex.__doc__) @rewrite_axis_style_signature( "labels", @@ -4399,7 +4399,7 @@ def rename( errors=errors, ) - @doc(NDFrame.fillna, **_shared_doc_kwargs) + @doc(NDFrame.fillna, **shared_doc_kwargs) def fillna( self, value=None, @@ -4463,9 +4463,9 @@ def pop(self, item: Label) -> Series: @doc( NDFrame.replace, - inplace=core_shared_doc_kwargs["inplace"], - replace_iloc=core_shared_doc_kwargs["replace_iloc"], - **_shared_doc_kwargs, + inplace=coreshared_doc_kwargs["inplace"], + replace_iloc=coreshared_doc_kwargs["replace_iloc"], + **shared_doc_kwargs, ) def replace( self, @@ -4520,7 +4520,7 @@ def _replace_columnwise( return return res.__finalize__(self) - @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"]) + @doc(NDFrame.shift, klass=shared_doc_kwargs["klass"]) def shift( self, periods=1, freq=None, axis=0, fill_value=lib.no_default ) -> DataFrame: @@ -4976,20 +4976,20 @@ class max type # ---------------------------------------------------------------------- # Reindex-based selection methods - @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) + @doc(NDFrame.isna, klass=shared_doc_kwargs["klass"]) def isna(self) -> DataFrame: result = self._constructor(self._mgr.isna(func=isna)) return result.__finalize__(self, method="isna") - @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) + @doc(NDFrame.isna, klass=shared_doc_kwargs["klass"]) def isnull(self) -> DataFrame: return self.isna() - @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) + @doc(NDFrame.notna, klass=shared_doc_kwargs["klass"]) def notna(self) -> DataFrame: return ~self.isna() - @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) + @doc(NDFrame.notna, klass=shared_doc_kwargs["klass"]) def notnull(self) -> DataFrame: return ~self.isna() @@ -5365,7 +5365,7 @@ def f(vals): # ---------------------------------------------------------------------- # Sorting # TODO: Just move the sort_values doc here. - @Substitution(**_shared_doc_kwargs) + @Substitution(**shared_doc_kwargs) @Appender(NDFrame.sort_values.__doc__) # error: Signature of "sort_values" incompatible with supertype "NDFrame" def sort_values( # type: ignore[override] @@ -6151,7 +6151,7 @@ def __rdivmod__(self, other) -> Tuple[DataFrame, DataFrame]: 3 b b NaN NaN 4.0 4.0 4 a a 5.0 5.0 5.0 5.0 """, - klass=_shared_doc_kwargs["klass"], + klass=shared_doc_kwargs["klass"], ) def compare( self, @@ -6631,7 +6631,7 @@ def update( NaN 12.3 33.0 """ ) - @Appender(_shared_docs["groupby"] % _shared_doc_kwargs) + @Appender(_shared_docs["groupby"] % shared_doc_kwargs) def groupby( self, by=None, @@ -7511,8 +7511,8 @@ def _gotitem( @doc( _shared_docs["aggregate"], - klass=_shared_doc_kwargs["klass"], - axis=_shared_doc_kwargs["axis"], + klass=shared_doc_kwargs["klass"], + axis=shared_doc_kwargs["axis"], see_also=_agg_summary_and_see_also_doc, examples=_agg_examples_doc, ) @@ -7560,8 +7560,8 @@ def _aggregate(self, arg, axis=0, *args, **kwargs): @doc( _shared_docs["transform"], - klass=_shared_doc_kwargs["klass"], - axis=_shared_doc_kwargs["axis"], + klass=shared_doc_kwargs["klass"], + axis=shared_doc_kwargs["axis"], ) def transform( self, func: AggFuncType, axis: Axis = 0, *args, **kwargs diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8555167aa91ea..54ae9254a9f99 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -104,7 +104,7 @@ from pandas.core.internals import BlockManager from pandas.core.missing import find_valid_index from pandas.core.ops import align_method_FRAME -from pandas.core.shared_docs import _shared_doc_kwargs, _shared_docs +from pandas.core.shared_docs import _shared_docs, shared_doc_kwargs from pandas.core.sorting import get_indexer_indexer from pandas.core.window import Expanding, ExponentialMovingWindow, Rolling, Window @@ -127,7 +127,7 @@ # goal is to be able to define the docs close to function, while still being # able to share _shared_docs = {**_shared_docs} -_shared_doc_kwargs.update( +shared_doc_kwargs.update( dict( axes="keywords for axes", klass="Series/DataFrame", @@ -4542,8 +4542,8 @@ def sort_index( return result.__finalize__(self, method="sort_index") @doc( - klass=_shared_doc_kwargs["klass"], - axes=_shared_doc_kwargs["axes"], + klass=shared_doc_kwargs["klass"], + axes=shared_doc_kwargs["axes"], optional_labels="", optional_axis="", ) @@ -5331,7 +5331,7 @@ def sample( return self.take(locs, axis=axis) @final - @doc(klass=_shared_doc_kwargs["klass"]) + @doc(klass=shared_doc_kwargs["klass"]) def pipe(self, func, *args, **kwargs): r""" Apply func(self, \*args, \*\*kwargs). @@ -6219,7 +6219,7 @@ def convert_dtypes( # ---------------------------------------------------------------------- # Filling NA's - @doc(**_shared_doc_kwargs) + @doc(**shared_doc_kwargs) def fillna( self: FrameOrSeries, value=None, @@ -6451,9 +6451,9 @@ def bfill( @doc( _shared_docs["to_replace"], - klass=_shared_doc_kwargs["klass"], - inplace=_shared_doc_kwargs["inplace"], - replace_iloc=_shared_doc_kwargs["replace_iloc"], + klass=shared_doc_kwargs["klass"], + inplace=shared_doc_kwargs["inplace"], + replace_iloc=shared_doc_kwargs["replace_iloc"], ) def replace( self, @@ -7102,7 +7102,7 @@ def asof(self, where, subset=None): # ---------------------------------------------------------------------- # Action Methods - @doc(klass=_shared_doc_kwargs["klass"]) + @doc(klass=shared_doc_kwargs["klass"]) def isna(self: FrameOrSeries) -> FrameOrSeries: """ Detect missing values. @@ -7165,11 +7165,11 @@ def isna(self: FrameOrSeries) -> FrameOrSeries: """ return isna(self).__finalize__(self, method="isna") - @doc(isna, klass=_shared_doc_kwargs["klass"]) + @doc(isna, klass=shared_doc_kwargs["klass"]) def isnull(self: FrameOrSeries) -> FrameOrSeries: return isna(self).__finalize__(self, method="isnull") - @doc(klass=_shared_doc_kwargs["klass"]) + @doc(klass=shared_doc_kwargs["klass"]) def notna(self: FrameOrSeries) -> FrameOrSeries: """ Detect existing (non-missing) values. @@ -7232,7 +7232,7 @@ def notna(self: FrameOrSeries) -> FrameOrSeries: """ return notna(self).__finalize__(self, method="notna") - @doc(notna, klass=_shared_doc_kwargs["klass"]) + @doc(notna, klass=shared_doc_kwargs["klass"]) def notnull(self: FrameOrSeries) -> FrameOrSeries: return notna(self).__finalize__(self, method="notnull") @@ -8328,7 +8328,7 @@ def ranker(data): return ranker(data) - @doc(_shared_docs["compare"], klass=_shared_doc_kwargs["klass"]) + @doc(_shared_docs["compare"], klass=shared_doc_kwargs["klass"]) def compare( self, other, @@ -8397,7 +8397,7 @@ def compare( return diff - @doc(**_shared_doc_kwargs) + @doc(**shared_doc_kwargs) def align( self, other, @@ -8837,7 +8837,7 @@ def _where( @final @doc( - klass=_shared_doc_kwargs["klass"], + klass=shared_doc_kwargs["klass"], cond="True", cond_rev="False", name="where", @@ -8981,7 +8981,7 @@ def where( @final @doc( where, - klass=_shared_doc_kwargs["klass"], + klass=shared_doc_kwargs["klass"], cond="False", cond_rev="True", name="mask", @@ -9015,7 +9015,7 @@ def mask( errors=errors, ) - @doc(klass=_shared_doc_kwargs["klass"]) + @doc(klass=shared_doc_kwargs["klass"]) def shift( self: FrameOrSeries, periods=1, freq=None, axis=0, fill_value=None ) -> FrameOrSeries: @@ -11075,7 +11075,7 @@ def _find_valid_index(self, how: str): return self.index[idxpos] @final - @doc(position="first", klass=_shared_doc_kwargs["klass"]) + @doc(position="first", klass=shared_doc_kwargs["klass"]) def first_valid_index(self): """ Return index for {position} non-NA/null value. @@ -11092,7 +11092,7 @@ def first_valid_index(self): return self._find_valid_index("first") @final - @doc(first_valid_index, position="last", klass=_shared_doc_kwargs["klass"]) + @doc(first_valid_index, position="last", klass=shared_doc_kwargs["klass"]) def last_valid_index(self): return self._find_valid_index("last") diff --git a/pandas/core/series.py b/pandas/core/series.py index 88eff719ac4c0..c229ea6fc60e5 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -91,7 +91,7 @@ from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.indexing import check_bool_indexer from pandas.core.internals import SingleBlockManager -from pandas.core.shared_docs import _shared_doc_kwargs, _shared_docs +from pandas.core.shared_docs import _shared_docs, shared_doc_kwargs from pandas.core.sorting import ensure_key_mapped, nargsort from pandas.core.strings import StringMethods from pandas.core.tools.datetimes import to_datetime @@ -105,7 +105,7 @@ __all__ = ["Series"] -_shared_doc_kwargs.update( +shared_doc_kwargs.update( dict( axes="index", klass="Series", @@ -1427,7 +1427,7 @@ def to_string( f.write(result) @doc( - klass=_shared_doc_kwargs["klass"], + klass=shared_doc_kwargs["klass"], examples=dedent( """ Examples @@ -1731,7 +1731,7 @@ def _set_name(self, name, inplace=False) -> "Series": Name: Max Speed, dtype: float64 """ ) - @Appender(generic._shared_docs["groupby"] % _shared_doc_kwargs) + @Appender(generic._shared_docs["groupby"] % shared_doc_kwargs) def groupby( self, by=None, @@ -2875,7 +2875,7 @@ def _construct_result( 3 d b 4 e e """, - klass=_shared_doc_kwargs["klass"], + klass=shared_doc_kwargs["klass"], ) def compare( self, @@ -4002,8 +4002,8 @@ def _gotitem(self, key, ndim, subset=None) -> "Series": @doc( generic._shared_docs["aggregate"], - klass=_shared_doc_kwargs["klass"], - axis=_shared_doc_kwargs["axis"], + klass=shared_doc_kwargs["klass"], + axis=shared_doc_kwargs["axis"], see_also=_agg_see_also_doc, examples=_agg_examples_doc, ) @@ -4042,8 +4042,8 @@ def aggregate(self, func=None, axis=0, *args, **kwargs): @doc( _shared_docs["transform"], - klass=_shared_doc_kwargs["klass"], - axis=_shared_doc_kwargs["axis"], + klass=shared_doc_kwargs["klass"], + axis=shared_doc_kwargs["axis"], ) def transform( self, func: AggFuncType, axis: Axis = 0, *args, **kwargs @@ -4245,8 +4245,8 @@ def _needs_reindex_multi(self, axes, method, level): @doc( NDFrame.align, - klass=_shared_doc_kwargs["klass"], - axes_single_arg=_shared_doc_kwargs["axes_single_arg"], + klass=shared_doc_kwargs["klass"], + axes_single_arg=shared_doc_kwargs["axes_single_arg"], ) def align( self, @@ -4369,7 +4369,7 @@ def rename( """ ) @Substitution( - **_shared_doc_kwargs, + **shared_doc_kwargs, extended_summary_sub="", axis_description_sub="", see_also_sub="", @@ -4380,10 +4380,10 @@ def set_axis(self, labels, axis: Axis = 0, inplace: bool = False): @doc( NDFrame.reindex, - klass=_shared_doc_kwargs["klass"], - axes=_shared_doc_kwargs["axes"], - optional_labels=_shared_doc_kwargs["optional_labels"], - optional_axis=_shared_doc_kwargs["optional_axis"], + klass=shared_doc_kwargs["klass"], + axes=shared_doc_kwargs["axes"], + optional_labels=shared_doc_kwargs["optional_labels"], + optional_axis=shared_doc_kwargs["optional_axis"], ) def reindex(self, index=None, **kwargs): return super().reindex(index=index, **kwargs) @@ -4494,7 +4494,7 @@ def drop( errors=errors, ) - @doc(NDFrame.fillna, **_shared_doc_kwargs) + @doc(NDFrame.fillna, **shared_doc_kwargs) def fillna( self, value=None, @@ -4542,9 +4542,9 @@ def pop(self, item: Label) -> Any: @doc( NDFrame.replace, - klass=_shared_doc_kwargs["klass"], - inplace=_shared_doc_kwargs["inplace"], - replace_iloc=_shared_doc_kwargs["replace_iloc"], + klass=shared_doc_kwargs["klass"], + inplace=shared_doc_kwargs["inplace"], + replace_iloc=shared_doc_kwargs["replace_iloc"], ) def replace( self, @@ -4589,7 +4589,7 @@ def _replace_single(self, to_replace, method, inplace, limit): return result - @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"]) + @doc(NDFrame.shift, klass=shared_doc_kwargs["klass"]) def shift(self, periods=1, freq=None, axis=0, fill_value=None) -> "Series": return super().shift( periods=periods, freq=freq, axis=axis, fill_value=fill_value @@ -4810,19 +4810,19 @@ def _convert_dtypes( result = input_series.copy() return result - @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) + @doc(NDFrame.isna, klass=shared_doc_kwargs["klass"]) def isna(self) -> "Series": return generic.NDFrame.isna(self) - @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) + @doc(NDFrame.isna, klass=shared_doc_kwargs["klass"]) def isnull(self) -> "Series": return super().isnull() - @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) + @doc(NDFrame.notna, klass=shared_doc_kwargs["klass"]) def notna(self) -> "Series": return super().notna() - @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) + @doc(NDFrame.notna, klass=shared_doc_kwargs["klass"]) def notnull(self) -> "Series": return super().notnull() diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 55c1257191f3b..0d20d3e4241c9 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -618,7 +618,7 @@ dtype: object """ -_shared_doc_kwargs: Dict[str, str] = dict( +shared_doc_kwargs: Dict[str, str] = dict( inplace=""" inplace : boolean, default False If True, performs operation inplace and returns None.""", From 7406775de7adcb3c850a3f734dcf373cb46fc8dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Wed, 11 Nov 2020 21:03:19 +0100 Subject: [PATCH 29/41] revert 'public' shared_doc_kwargs adjust validation --- pandas/core/frame.py | 44 +++++++++++------------ pandas/core/generic.py | 40 ++++++++++----------- pandas/core/series.py | 50 +++++++++++++-------------- pandas/core/shared_docs.py | 2 +- scripts/validate_unwanted_patterns.py | 1 + 5 files changed, 69 insertions(+), 68 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5dfa51e892c77..9c21d0b944601 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -155,7 +155,7 @@ ) from pandas.core.reshape.melt import melt from pandas.core.series import Series -from pandas.core.shared_docs import shared_doc_kwargs as coreshared_doc_kwargs +from pandas.core.shared_docs import _shared_doc_kwargs as core_shared_doc_kwargs from pandas.core.sorting import get_group_index, lexsort_indexer, nargsort from pandas.io.common import get_filepath_or_buffer @@ -173,7 +173,7 @@ # --------------------------------------------------------------------- # Docstring templates -shared_doc_kwargs = dict( +_shared_doc_kwargs = dict( axes="index, columns", klass="DataFrame", axes_single_arg="{0 or 'index', 1 or 'columns'}", @@ -2255,7 +2255,7 @@ def to_feather(self, path: FilePathOrBuffer[AnyStr], **kwargs) -> None: @doc( Series.to_markdown, - klass=shared_doc_kwargs["klass"], + klass=_shared_doc_kwargs["klass"], examples="""Examples -------- >>> df = pd.DataFrame( @@ -4035,7 +4035,7 @@ def _reindex_multi(self, axes, copy, fill_value) -> DataFrame: fill_value=fill_value, ) - @doc(NDFrame.align, **shared_doc_kwargs) + @doc(NDFrame.align, **_shared_doc_kwargs) def align( self, other, @@ -4095,7 +4095,7 @@ def align( """ ) @Substitution( - **shared_doc_kwargs, + **_shared_doc_kwargs, extended_summary_sub=" column or", axis_description_sub=", and 1 identifies the columns", see_also_sub=" or columns", @@ -4104,7 +4104,7 @@ def align( def set_axis(self, labels, axis: Axis = 0, inplace: bool = False): return super().set_axis(labels, axis=axis, inplace=inplace) - @Substitution(**shared_doc_kwargs) + @Substitution(**_shared_doc_kwargs) @Appender(NDFrame.reindex.__doc__) @rewrite_axis_style_signature( "labels", @@ -4399,7 +4399,7 @@ def rename( errors=errors, ) - @doc(NDFrame.fillna, **shared_doc_kwargs) + @doc(NDFrame.fillna, **_shared_doc_kwargs) def fillna( self, value=None, @@ -4463,9 +4463,9 @@ def pop(self, item: Label) -> Series: @doc( NDFrame.replace, - inplace=coreshared_doc_kwargs["inplace"], - replace_iloc=coreshared_doc_kwargs["replace_iloc"], - **shared_doc_kwargs, + inplace=core_shared_doc_kwargs["inplace"], + replace_iloc=core_shared_doc_kwargs["replace_iloc"], + **_shared_doc_kwargs, ) def replace( self, @@ -4520,7 +4520,7 @@ def _replace_columnwise( return return res.__finalize__(self) - @doc(NDFrame.shift, klass=shared_doc_kwargs["klass"]) + @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"]) def shift( self, periods=1, freq=None, axis=0, fill_value=lib.no_default ) -> DataFrame: @@ -4976,20 +4976,20 @@ class max type # ---------------------------------------------------------------------- # Reindex-based selection methods - @doc(NDFrame.isna, klass=shared_doc_kwargs["klass"]) + @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) def isna(self) -> DataFrame: result = self._constructor(self._mgr.isna(func=isna)) return result.__finalize__(self, method="isna") - @doc(NDFrame.isna, klass=shared_doc_kwargs["klass"]) + @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) def isnull(self) -> DataFrame: return self.isna() - @doc(NDFrame.notna, klass=shared_doc_kwargs["klass"]) + @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) def notna(self) -> DataFrame: return ~self.isna() - @doc(NDFrame.notna, klass=shared_doc_kwargs["klass"]) + @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) def notnull(self) -> DataFrame: return ~self.isna() @@ -5365,7 +5365,7 @@ def f(vals): # ---------------------------------------------------------------------- # Sorting # TODO: Just move the sort_values doc here. - @Substitution(**shared_doc_kwargs) + @Substitution(**_shared_doc_kwargs) @Appender(NDFrame.sort_values.__doc__) # error: Signature of "sort_values" incompatible with supertype "NDFrame" def sort_values( # type: ignore[override] @@ -6151,7 +6151,7 @@ def __rdivmod__(self, other) -> Tuple[DataFrame, DataFrame]: 3 b b NaN NaN 4.0 4.0 4 a a 5.0 5.0 5.0 5.0 """, - klass=shared_doc_kwargs["klass"], + klass=_shared_doc_kwargs["klass"], ) def compare( self, @@ -6631,7 +6631,7 @@ def update( NaN 12.3 33.0 """ ) - @Appender(_shared_docs["groupby"] % shared_doc_kwargs) + @Appender(_shared_docs["groupby"] % _shared_doc_kwargs) def groupby( self, by=None, @@ -7511,8 +7511,8 @@ def _gotitem( @doc( _shared_docs["aggregate"], - klass=shared_doc_kwargs["klass"], - axis=shared_doc_kwargs["axis"], + klass=_shared_doc_kwargs["klass"], + axis=_shared_doc_kwargs["axis"], see_also=_agg_summary_and_see_also_doc, examples=_agg_examples_doc, ) @@ -7560,8 +7560,8 @@ def _aggregate(self, arg, axis=0, *args, **kwargs): @doc( _shared_docs["transform"], - klass=shared_doc_kwargs["klass"], - axis=shared_doc_kwargs["axis"], + klass=_shared_doc_kwargs["klass"], + axis=_shared_doc_kwargs["axis"], ) def transform( self, func: AggFuncType, axis: Axis = 0, *args, **kwargs diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 54ae9254a9f99..8555167aa91ea 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -104,7 +104,7 @@ from pandas.core.internals import BlockManager from pandas.core.missing import find_valid_index from pandas.core.ops import align_method_FRAME -from pandas.core.shared_docs import _shared_docs, shared_doc_kwargs +from pandas.core.shared_docs import _shared_doc_kwargs, _shared_docs from pandas.core.sorting import get_indexer_indexer from pandas.core.window import Expanding, ExponentialMovingWindow, Rolling, Window @@ -127,7 +127,7 @@ # goal is to be able to define the docs close to function, while still being # able to share _shared_docs = {**_shared_docs} -shared_doc_kwargs.update( +_shared_doc_kwargs.update( dict( axes="keywords for axes", klass="Series/DataFrame", @@ -4542,8 +4542,8 @@ def sort_index( return result.__finalize__(self, method="sort_index") @doc( - klass=shared_doc_kwargs["klass"], - axes=shared_doc_kwargs["axes"], + klass=_shared_doc_kwargs["klass"], + axes=_shared_doc_kwargs["axes"], optional_labels="", optional_axis="", ) @@ -5331,7 +5331,7 @@ def sample( return self.take(locs, axis=axis) @final - @doc(klass=shared_doc_kwargs["klass"]) + @doc(klass=_shared_doc_kwargs["klass"]) def pipe(self, func, *args, **kwargs): r""" Apply func(self, \*args, \*\*kwargs). @@ -6219,7 +6219,7 @@ def convert_dtypes( # ---------------------------------------------------------------------- # Filling NA's - @doc(**shared_doc_kwargs) + @doc(**_shared_doc_kwargs) def fillna( self: FrameOrSeries, value=None, @@ -6451,9 +6451,9 @@ def bfill( @doc( _shared_docs["to_replace"], - klass=shared_doc_kwargs["klass"], - inplace=shared_doc_kwargs["inplace"], - replace_iloc=shared_doc_kwargs["replace_iloc"], + klass=_shared_doc_kwargs["klass"], + inplace=_shared_doc_kwargs["inplace"], + replace_iloc=_shared_doc_kwargs["replace_iloc"], ) def replace( self, @@ -7102,7 +7102,7 @@ def asof(self, where, subset=None): # ---------------------------------------------------------------------- # Action Methods - @doc(klass=shared_doc_kwargs["klass"]) + @doc(klass=_shared_doc_kwargs["klass"]) def isna(self: FrameOrSeries) -> FrameOrSeries: """ Detect missing values. @@ -7165,11 +7165,11 @@ def isna(self: FrameOrSeries) -> FrameOrSeries: """ return isna(self).__finalize__(self, method="isna") - @doc(isna, klass=shared_doc_kwargs["klass"]) + @doc(isna, klass=_shared_doc_kwargs["klass"]) def isnull(self: FrameOrSeries) -> FrameOrSeries: return isna(self).__finalize__(self, method="isnull") - @doc(klass=shared_doc_kwargs["klass"]) + @doc(klass=_shared_doc_kwargs["klass"]) def notna(self: FrameOrSeries) -> FrameOrSeries: """ Detect existing (non-missing) values. @@ -7232,7 +7232,7 @@ def notna(self: FrameOrSeries) -> FrameOrSeries: """ return notna(self).__finalize__(self, method="notna") - @doc(notna, klass=shared_doc_kwargs["klass"]) + @doc(notna, klass=_shared_doc_kwargs["klass"]) def notnull(self: FrameOrSeries) -> FrameOrSeries: return notna(self).__finalize__(self, method="notnull") @@ -8328,7 +8328,7 @@ def ranker(data): return ranker(data) - @doc(_shared_docs["compare"], klass=shared_doc_kwargs["klass"]) + @doc(_shared_docs["compare"], klass=_shared_doc_kwargs["klass"]) def compare( self, other, @@ -8397,7 +8397,7 @@ def compare( return diff - @doc(**shared_doc_kwargs) + @doc(**_shared_doc_kwargs) def align( self, other, @@ -8837,7 +8837,7 @@ def _where( @final @doc( - klass=shared_doc_kwargs["klass"], + klass=_shared_doc_kwargs["klass"], cond="True", cond_rev="False", name="where", @@ -8981,7 +8981,7 @@ def where( @final @doc( where, - klass=shared_doc_kwargs["klass"], + klass=_shared_doc_kwargs["klass"], cond="False", cond_rev="True", name="mask", @@ -9015,7 +9015,7 @@ def mask( errors=errors, ) - @doc(klass=shared_doc_kwargs["klass"]) + @doc(klass=_shared_doc_kwargs["klass"]) def shift( self: FrameOrSeries, periods=1, freq=None, axis=0, fill_value=None ) -> FrameOrSeries: @@ -11075,7 +11075,7 @@ def _find_valid_index(self, how: str): return self.index[idxpos] @final - @doc(position="first", klass=shared_doc_kwargs["klass"]) + @doc(position="first", klass=_shared_doc_kwargs["klass"]) def first_valid_index(self): """ Return index for {position} non-NA/null value. @@ -11092,7 +11092,7 @@ def first_valid_index(self): return self._find_valid_index("first") @final - @doc(first_valid_index, position="last", klass=shared_doc_kwargs["klass"]) + @doc(first_valid_index, position="last", klass=_shared_doc_kwargs["klass"]) def last_valid_index(self): return self._find_valid_index("last") diff --git a/pandas/core/series.py b/pandas/core/series.py index c229ea6fc60e5..88eff719ac4c0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -91,7 +91,7 @@ from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.indexing import check_bool_indexer from pandas.core.internals import SingleBlockManager -from pandas.core.shared_docs import _shared_docs, shared_doc_kwargs +from pandas.core.shared_docs import _shared_doc_kwargs, _shared_docs from pandas.core.sorting import ensure_key_mapped, nargsort from pandas.core.strings import StringMethods from pandas.core.tools.datetimes import to_datetime @@ -105,7 +105,7 @@ __all__ = ["Series"] -shared_doc_kwargs.update( +_shared_doc_kwargs.update( dict( axes="index", klass="Series", @@ -1427,7 +1427,7 @@ def to_string( f.write(result) @doc( - klass=shared_doc_kwargs["klass"], + klass=_shared_doc_kwargs["klass"], examples=dedent( """ Examples @@ -1731,7 +1731,7 @@ def _set_name(self, name, inplace=False) -> "Series": Name: Max Speed, dtype: float64 """ ) - @Appender(generic._shared_docs["groupby"] % shared_doc_kwargs) + @Appender(generic._shared_docs["groupby"] % _shared_doc_kwargs) def groupby( self, by=None, @@ -2875,7 +2875,7 @@ def _construct_result( 3 d b 4 e e """, - klass=shared_doc_kwargs["klass"], + klass=_shared_doc_kwargs["klass"], ) def compare( self, @@ -4002,8 +4002,8 @@ def _gotitem(self, key, ndim, subset=None) -> "Series": @doc( generic._shared_docs["aggregate"], - klass=shared_doc_kwargs["klass"], - axis=shared_doc_kwargs["axis"], + klass=_shared_doc_kwargs["klass"], + axis=_shared_doc_kwargs["axis"], see_also=_agg_see_also_doc, examples=_agg_examples_doc, ) @@ -4042,8 +4042,8 @@ def aggregate(self, func=None, axis=0, *args, **kwargs): @doc( _shared_docs["transform"], - klass=shared_doc_kwargs["klass"], - axis=shared_doc_kwargs["axis"], + klass=_shared_doc_kwargs["klass"], + axis=_shared_doc_kwargs["axis"], ) def transform( self, func: AggFuncType, axis: Axis = 0, *args, **kwargs @@ -4245,8 +4245,8 @@ def _needs_reindex_multi(self, axes, method, level): @doc( NDFrame.align, - klass=shared_doc_kwargs["klass"], - axes_single_arg=shared_doc_kwargs["axes_single_arg"], + klass=_shared_doc_kwargs["klass"], + axes_single_arg=_shared_doc_kwargs["axes_single_arg"], ) def align( self, @@ -4369,7 +4369,7 @@ def rename( """ ) @Substitution( - **shared_doc_kwargs, + **_shared_doc_kwargs, extended_summary_sub="", axis_description_sub="", see_also_sub="", @@ -4380,10 +4380,10 @@ def set_axis(self, labels, axis: Axis = 0, inplace: bool = False): @doc( NDFrame.reindex, - klass=shared_doc_kwargs["klass"], - axes=shared_doc_kwargs["axes"], - optional_labels=shared_doc_kwargs["optional_labels"], - optional_axis=shared_doc_kwargs["optional_axis"], + klass=_shared_doc_kwargs["klass"], + axes=_shared_doc_kwargs["axes"], + optional_labels=_shared_doc_kwargs["optional_labels"], + optional_axis=_shared_doc_kwargs["optional_axis"], ) def reindex(self, index=None, **kwargs): return super().reindex(index=index, **kwargs) @@ -4494,7 +4494,7 @@ def drop( errors=errors, ) - @doc(NDFrame.fillna, **shared_doc_kwargs) + @doc(NDFrame.fillna, **_shared_doc_kwargs) def fillna( self, value=None, @@ -4542,9 +4542,9 @@ def pop(self, item: Label) -> Any: @doc( NDFrame.replace, - klass=shared_doc_kwargs["klass"], - inplace=shared_doc_kwargs["inplace"], - replace_iloc=shared_doc_kwargs["replace_iloc"], + klass=_shared_doc_kwargs["klass"], + inplace=_shared_doc_kwargs["inplace"], + replace_iloc=_shared_doc_kwargs["replace_iloc"], ) def replace( self, @@ -4589,7 +4589,7 @@ def _replace_single(self, to_replace, method, inplace, limit): return result - @doc(NDFrame.shift, klass=shared_doc_kwargs["klass"]) + @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"]) def shift(self, periods=1, freq=None, axis=0, fill_value=None) -> "Series": return super().shift( periods=periods, freq=freq, axis=axis, fill_value=fill_value @@ -4810,19 +4810,19 @@ def _convert_dtypes( result = input_series.copy() return result - @doc(NDFrame.isna, klass=shared_doc_kwargs["klass"]) + @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) def isna(self) -> "Series": return generic.NDFrame.isna(self) - @doc(NDFrame.isna, klass=shared_doc_kwargs["klass"]) + @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) def isnull(self) -> "Series": return super().isnull() - @doc(NDFrame.notna, klass=shared_doc_kwargs["klass"]) + @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) def notna(self) -> "Series": return super().notna() - @doc(NDFrame.notna, klass=shared_doc_kwargs["klass"]) + @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) def notnull(self) -> "Series": return super().notnull() diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 0d20d3e4241c9..55c1257191f3b 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -618,7 +618,7 @@ dtype: object """ -shared_doc_kwargs: Dict[str, str] = dict( +_shared_doc_kwargs: Dict[str, str] = dict( inplace=""" inplace : boolean, default False If True, performs operation inplace and returns None.""", diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 9c58a55cb907e..69dc6332bfc0f 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -23,6 +23,7 @@ "_interval_shared_docs", "_merge_doc", "_shared_docs", + "_shared_doc_kwargs", "_apply_docs", "_new_Index", "_new_PeriodIndex", From 7b4a2b0e096036fcbd0a8280eafb0698fc92043a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Wed, 11 Nov 2020 21:04:23 +0100 Subject: [PATCH 30/41] Move whatsnes --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index f751a91cecf19..3811627f3d531 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -234,6 +234,7 @@ Other enhancements - Improve error reporting for :meth:`DataFrame.merge()` when invalid merge column definitions were given (:issue:`16228`) - Improve numerical stability for :meth:`Rolling.skew()`, :meth:`Rolling.kurt()`, :meth:`Expanding.skew()` and :meth:`Expanding.kurt()` through implementation of Kahan summation (:issue:`6929`) - Improved error reporting for subsetting columns of a :class:`DataFrameGroupBy` with ``axis=1`` (:issue:`37725`) +- :class:`Index` and :class:`MultiIndex` now has a `replace()` method (:issue:`19495`). .. _whatsnew_120.api_breaking.python: From 5f86494a9114ef995b9b2d2983db79d4454aaf6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Wed, 11 Nov 2020 21:53:51 +0100 Subject: [PATCH 31/41] Add replace method for categorical index --- pandas/core/indexes/category.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 24bd60a7356dd..566240a8803aa 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -29,9 +29,12 @@ from pandas.core.indexes.base import Index, _index_shared_docs, maybe_extract_name from pandas.core.indexes.extension import NDArrayBackedExtensionIndex, inherit_names import pandas.core.missing as missing +from pandas.core.shared_docs import _shared_docs _index_doc_kwargs = dict(ibase._index_doc_kwargs) -_index_doc_kwargs.update(dict(target_klass="CategoricalIndex")) +_index_doc_kwargs.update( + dict(target_klass="CategoricalIndex", klass="CategoricalIndex") +) @inherit_names( @@ -697,3 +700,21 @@ def _delegate_method(self, name: str, *args, **kwargs): if is_scalar(res): return res return CategoricalIndex(res, name=self.name) + + @doc( + _shared_docs["to_replace"], + klass=_index_doc_kwargs["klass"], + inplace=_index_doc_kwargs["inplace"], + replace_iloc="", + ) + def replace( + self, + to_replace=None, + value=None, + limit=None, + regex=False, + method="pad", + ): + return super().replace( + to_replace=to_replace, value=value, limit=limit, regex=regex, method=method + ) From 61b8ac53ba8683ee951dee28054c117c0973bf4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Mon, 16 Nov 2020 21:38:26 +0100 Subject: [PATCH 32/41] Revert changes and a better NotImplementedError message --- pandas/core/indexes/category.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 566240a8803aa..5e4a05fa94f7b 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -29,12 +29,9 @@ from pandas.core.indexes.base import Index, _index_shared_docs, maybe_extract_name from pandas.core.indexes.extension import NDArrayBackedExtensionIndex, inherit_names import pandas.core.missing as missing -from pandas.core.shared_docs import _shared_docs _index_doc_kwargs = dict(ibase._index_doc_kwargs) -_index_doc_kwargs.update( - dict(target_klass="CategoricalIndex", klass="CategoricalIndex") -) +_index_doc_kwargs.update(dict(target_klass="CategoricalIndex")) @inherit_names( @@ -701,12 +698,6 @@ def _delegate_method(self, name: str, *args, **kwargs): return res return CategoricalIndex(res, name=self.name) - @doc( - _shared_docs["to_replace"], - klass=_index_doc_kwargs["klass"], - inplace=_index_doc_kwargs["inplace"], - replace_iloc="", - ) def replace( self, to_replace=None, @@ -715,6 +706,6 @@ def replace( regex=False, method="pad", ): - return super().replace( - to_replace=to_replace, value=value, limit=limit, regex=regex, method=method + raise NotImplementedError( + "Replacing values of a CategoricalIndex is not supported." ) From 1bb05f9fa52ebcb116964ecd8faee7b879939930 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Mon, 16 Nov 2020 21:45:32 +0100 Subject: [PATCH 33/41] Grammar fix in whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 3811627f3d531..b1ea666513929 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -234,7 +234,7 @@ Other enhancements - Improve error reporting for :meth:`DataFrame.merge()` when invalid merge column definitions were given (:issue:`16228`) - Improve numerical stability for :meth:`Rolling.skew()`, :meth:`Rolling.kurt()`, :meth:`Expanding.skew()` and :meth:`Expanding.kurt()` through implementation of Kahan summation (:issue:`6929`) - Improved error reporting for subsetting columns of a :class:`DataFrameGroupBy` with ``axis=1`` (:issue:`37725`) -- :class:`Index` and :class:`MultiIndex` now has a `replace()` method (:issue:`19495`). +- :class:`Index` and :class:`MultiIndex` now have a ``replace()`` method (:issue:`19495`). .. _whatsnew_120.api_breaking.python: From c955338f0f0a39440560bb2ce838205a1f162b79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Mon, 16 Nov 2020 21:57:57 +0100 Subject: [PATCH 34/41] fix formatting --- pandas/tests/indexes/base_class/test_replace.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/base_class/test_replace.py b/pandas/tests/indexes/base_class/test_replace.py index e555bce9ebd63..1887b171ef5cb 100644 --- a/pandas/tests/indexes/base_class/test_replace.py +++ b/pandas/tests/indexes/base_class/test_replace.py @@ -9,7 +9,12 @@ [ ([1, 2, 3], [1, 3], ["a", "c"], ["a", 2, "c"]), ([1, 2, 3], 1, "a", ["a", 2, 3]), - ([1, None, 2], [1, 2], "a", ["a", None, "a"],), + ( + [1, None, 2], + [1, 2], + "a", + ["a", None, "a"], + ), ], ) def test_index_replace(index, to_replace, value, expected): From 01ac22f3c37f6bb984205d9fdf1b8a9b2f2c9cbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Sun, 22 Nov 2020 14:27:23 +0100 Subject: [PATCH 35/41] Revert unintended formatting changes in core/shared_docs.py --- pandas/core/shared_docs.py | 208 ++++++++++++++++++------------------- 1 file changed, 104 insertions(+), 104 deletions(-) diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 55c1257191f3b..7858be33364a4 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -145,115 +145,115 @@ _shared_docs[ "melt" ] = """ - Unpivot a DataFrame from wide to long format, optionally leaving identifiers set. +Unpivot a DataFrame from wide to long format, optionally leaving identifiers set. - This function is useful to massage a DataFrame into a format where one - or more columns are identifier variables (`id_vars`), while all other - columns, considered measured variables (`value_vars`), are "unpivoted" to - the row axis, leaving just two non-identifier columns, 'variable' and - 'value'. +This function is useful to massage a DataFrame into a format where one +or more columns are identifier variables (`id_vars`), while all other +columns, considered measured variables (`value_vars`), are "unpivoted" to +the row axis, leaving just two non-identifier columns, 'variable' and +'value'. - Parameters - ---------- - id_vars : tuple, list, or ndarray, optional - Column(s) to use as identifier variables. - value_vars : tuple, list, or ndarray, optional - Column(s) to unpivot. If not specified, uses all columns that - are not set as `id_vars`. - var_name : scalar - Name to use for the 'variable' column. If None it uses - ``frame.columns.name`` or 'variable'. - value_name : scalar, default 'value' - Name to use for the 'value' column. - col_level : int or str, optional - If columns are a MultiIndex then use this level to melt. - ignore_index : bool, default True - If True, original index is ignored. If False, the original index is retained. - Index labels will be repeated as necessary. - - .. versionadded:: 1.1.0 +Parameters +---------- +id_vars : tuple, list, or ndarray, optional + Column(s) to use as identifier variables. +value_vars : tuple, list, or ndarray, optional + Column(s) to unpivot. If not specified, uses all columns that + are not set as `id_vars`. +var_name : scalar + Name to use for the 'variable' column. If None it uses + ``frame.columns.name`` or 'variable'. +value_name : scalar, default 'value' + Name to use for the 'value' column. +col_level : int or str, optional + If columns are a MultiIndex then use this level to melt. +ignore_index : bool, default True + If True, original index is ignored. If False, the original index is retained. + Index labels will be repeated as necessary. - Returns - ------- - DataFrame - Unpivoted DataFrame. + .. versionadded:: 1.1.0 - See Also - -------- - %(other)s : Identical method. - pivot_table : Create a spreadsheet-style pivot table as a DataFrame. - DataFrame.pivot : Return reshaped DataFrame organized - by given index / column values. - DataFrame.explode : Explode a DataFrame from list-like - columns to long format. +Returns +------- +DataFrame + Unpivoted DataFrame. - Examples - -------- - >>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'}, - ... 'B': {0: 1, 1: 3, 2: 5}, - ... 'C': {0: 2, 1: 4, 2: 6}}) - >>> df - A B C - 0 a 1 2 - 1 b 3 4 - 2 c 5 6 - - >>> %(caller)sid_vars=['A'], value_vars=['B']) - A variable value - 0 a B 1 - 1 b B 3 - 2 c B 5 - - >>> %(caller)sid_vars=['A'], value_vars=['B', 'C']) - A variable value - 0 a B 1 - 1 b B 3 - 2 c B 5 - 3 a C 2 - 4 b C 4 - 5 c C 6 - - The names of 'variable' and 'value' columns can be customized: - - >>> %(caller)sid_vars=['A'], value_vars=['B'], - ... var_name='myVarname', value_name='myValname') - A myVarname myValname - 0 a B 1 - 1 b B 3 - 2 c B 5 - - Original index values can be kept around: - - >>> %(caller)sid_vars=['A'], value_vars=['B', 'C'], ignore_index=False) - A variable value - 0 a B 1 - 1 b B 3 - 2 c B 5 - 0 a C 2 - 1 b C 4 - 2 c C 6 - - If you have multi-index columns: - - >>> df.columns = [list('ABC'), list('DEF')] - >>> df - A B C - D E F - 0 a 1 2 - 1 b 3 4 - 2 c 5 6 - - >>> %(caller)scol_level=0, id_vars=['A'], value_vars=['B']) - A variable value - 0 a B 1 - 1 b B 3 - 2 c B 5 - - >>> %(caller)sid_vars=[('A', 'D')], value_vars=[('B', 'E')]) - (A, D) variable_0 variable_1 value - 0 a B E 1 - 1 b B E 3 - 2 c B E 5 +See Also +-------- +%(other)s : Identical method. +pivot_table : Create a spreadsheet-style pivot table as a DataFrame. +DataFrame.pivot : Return reshaped DataFrame organized + by given index / column values. +DataFrame.explode : Explode a DataFrame from list-like + columns to long format. + +Examples +-------- +>>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'}, +... 'B': {0: 1, 1: 3, 2: 5}, +... 'C': {0: 2, 1: 4, 2: 6}}) +>>> df + A B C +0 a 1 2 +1 b 3 4 +2 c 5 6 + +>>> %(caller)sid_vars=['A'], value_vars=['B']) + A variable value +0 a B 1 +1 b B 3 +2 c B 5 + +>>> %(caller)sid_vars=['A'], value_vars=['B', 'C']) + A variable value +0 a B 1 +1 b B 3 +2 c B 5 +3 a C 2 +4 b C 4 +5 c C 6 + +The names of 'variable' and 'value' columns can be customized: + +>>> %(caller)sid_vars=['A'], value_vars=['B'], +... var_name='myVarname', value_name='myValname') + A myVarname myValname +0 a B 1 +1 b B 3 +2 c B 5 + +Original index values can be kept around: + +>>> %(caller)sid_vars=['A'], value_vars=['B', 'C'], ignore_index=False) + A variable value +0 a B 1 +1 b B 3 +2 c B 5 +0 a C 2 +1 b C 4 +2 c C 6 + +If you have multi-index columns: + +>>> df.columns = [list('ABC'), list('DEF')] +>>> df + A B C + D E F +0 a 1 2 +1 b 3 4 +2 c 5 6 + +>>> %(caller)scol_level=0, id_vars=['A'], value_vars=['B']) + A variable value +0 a B 1 +1 b B 3 +2 c B 5 + +>>> %(caller)sid_vars=[('A', 'D')], value_vars=[('B', 'E')]) + (A, D) variable_0 variable_1 value +0 a B E 1 +1 b B E 3 +2 c B E 5 """ _shared_docs[ From e6a4e8deb8f42a20cf8f9fb19d0c7f4e53b6261c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Sun, 22 Nov 2020 14:56:41 +0100 Subject: [PATCH 36/41] test case for change identified by the doctest & formatting by black --- pandas/tests/frame/methods/test_replace.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index baa310ddd6f09..6e6ed1b73304b 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1632,3 +1632,12 @@ def test_replace_unicode(self): result = df1.replace(columns_values_map) expected = DataFrame({"positive": np.ones(3)}) tm.assert_frame_equal(result, expected) + + def test_replace_multiple_bool_datetime_type_mismatch(self): + # See https://github.com/pandas-dev/pandas/pull/32542#discussion_r528338117 + df = DataFrame({"A": [True, False, True], "B": [False, True, False]}) + + result = df.replace({"a string": "new value", True: False}) + expected = DataFrame({"A": [False, False, False], "B": [False, False, False]}) + + tm.assert_frame_equal(result, expected) From 7d7e5ca18f7d1b35c25dd6c707bbf9a7841368b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Sun, 22 Nov 2020 15:01:45 +0100 Subject: [PATCH 37/41] Remove stale documentation. See https://github.com/pandas-dev/pandas/pull/32542#discussion_r528338117 --- pandas/core/shared_docs.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 7858be33364a4..96183e3cdbbc7 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -569,20 +569,6 @@ 1 new new 2 bait xyz - Note that when replacing multiple ``bool`` or ``datetime64`` objects, - the data types in the `to_replace` parameter must match the data - type of the value being replaced: - - >>> df = pd.DataFrame({{'A': [True, False, True], - ... 'B': [False, True, False]}}) - >>> df.replace({{'a string': 'new value', True: False}}) # raises - Traceback (most recent call last): - ... - TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str' - - This raises a ``TypeError`` because one of the ``dict`` keys is not of - the correct type for replacement. - Compare the behavior of ``s.replace({{'a': None}})`` and ``s.replace('a', None)`` to understand the peculiarities of the `to_replace` parameter: From eb784b58dd31cc34d0c7901120786c1629b0e217 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Sun, 13 Dec 2020 14:59:28 +0100 Subject: [PATCH 38/41] Address comments and add tests --- pandas/core/indexes/category.py | 13 ++++- .../tests/indexes/categorical/test_replace.py | 55 +++++++++++++++++++ 2 files changed, 66 insertions(+), 2 deletions(-) create mode 100644 pandas/tests/indexes/categorical/test_replace.py diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 75e5c3c9761a6..4ab76dcc1279a 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -665,6 +665,15 @@ def replace( regex=False, method="pad", ): - raise NotImplementedError( - "Replacing values of a CategoricalIndex is not supported." + if regex is not False: + raise NotImplementedError( + "Regex replace is not yet implemented for CategoricalIndex." + ) + + new_index = self.to_series().replace( + to_replace=to_replace, value=value, limit=limit, regex=regex, method=method ) + + new_index = CategoricalIndex(new_index) + + return new_index diff --git a/pandas/tests/indexes/categorical/test_replace.py b/pandas/tests/indexes/categorical/test_replace.py new file mode 100644 index 0000000000000..a1e05b2c25148 --- /dev/null +++ b/pandas/tests/indexes/categorical/test_replace.py @@ -0,0 +1,55 @@ +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize( + "index, to_replace, value, expected", + [ + ([1, 2, 3], 3, "a", [1, 2, "a"]), + ( + [1, None, 2], + [1, 2], + "a", + ["a", None, "a"], + ), + ], +) +def test_categorical_index_replace(index, to_replace, value, expected): + index = pd.CategoricalIndex(index) + expected = pd.CategoricalIndex(expected) + + result = index.replace(to_replace=to_replace, value=value) + + tm.assert_equal(result, expected) + + +def test_categorical_index_replace_dict_and_value(): + index = pd.CategoricalIndex([1, 2, 3]) + + msg = "Series.replace cannot use dict-like to_replace and non-None value" + with pytest.raises(ValueError, match=msg): + index.replace({1: "a", 3: "c"}, "x") + + +@pytest.mark.parametrize( + "index, to_replace, value, expected", + [ + ([1, 2, 3], [2, 3], ["b", "c"], [1, "b", "c"]), + ([1, 2, 3], 3, "c", [1, 2, "c"]), + ( + [1, None, 2], + [1, 2], + "a", + ["a", None, "a"], + ), + ], +) +def test_index_replace(index, to_replace, value, expected): + index = pd.CategoricalIndex(index) + expected = pd.CategoricalIndex(expected) + + result = index.replace(to_replace=to_replace, value=value) + + tm.assert_equal(result, expected) From 2a778bb6e2da358eee77a81f6d92a52e5f082a65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Sun, 13 Dec 2020 15:12:21 +0100 Subject: [PATCH 39/41] C408 --- pandas/core/generic.py | 14 +++++++------- pandas/core/series.py | 26 +++++++++++++------------- pandas/core/shared_docs.py | 8 ++++---- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 78f76e4b7d650..5f1447e4162e7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -129,15 +129,15 @@ # able to share _shared_docs = {**_shared_docs} _shared_doc_kwargs.update( - dict( - axes="keywords for axes", - klass="Series/DataFrame", - axes_single_arg="int or labels for object", - args_transpose="axes to permute (int or label for object)", - optional_by=""" + { + "axes": "keywords for axes", + "klass": "Series/DataFrame", + "axes_single_arg": "int or labels for object", + "args_transpose": "axes to permute (int or label for object)", + "optional_by": """ by : str or list of str Name or list of names to sort by""", - ) + } ) diff --git a/pandas/core/series.py b/pandas/core/series.py index cb5d76ad6c199..abfb3f9824e61 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -112,21 +112,21 @@ __all__ = ["Series"] _shared_doc_kwargs.update( - dict( - axes="index", - klass="Series", - axes_single_arg="{0 or 'index'}", - axis="""axis : {0 or 'index'} + { + "axes": "index", + "klass": "Series", + "axes_single_arg": "{0 or 'index'}", + "axis": """axis : {0 or 'index'} Parameter needed for compatibility with DataFrame.""", - inplace="""inplace : boolean, default False + "inplace": """inplace : boolean, default False If True, performs operation inplace and returns None.""", - unique="np.ndarray", - duplicated="Series", - optional_by="", - optional_mapper="", - optional_labels="", - optional_axis="", - ) + "unique": "np.ndarray", + "duplicated": "Series", + "optional_by": "", + "optional_mapper": "", + "optional_labels": "", + "optional_axis": "", + } ) diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 8520d89ea9d9e..07b54199ac0e3 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -668,11 +668,11 @@ dtype: object """ -_shared_doc_kwargs: Dict[str, str] = dict( - inplace=""" +_shared_doc_kwargs: Dict[str, str] = { + "inplace": """ inplace : boolean, default False If True, performs operation inplace and returns None.""", - replace_iloc=""" + "replace_iloc": """ This differs from updating with ``.loc`` or ``.iloc``, which require you to specify a location to update with some value.""", -) +} From 7fddde6b9cc5dc1ef1a944ba8a2878d24c3d754b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Fri, 15 Jan 2021 22:02:18 +0100 Subject: [PATCH 40/41] revert validate_unwanted_patterns --- scripts/validate_unwanted_patterns.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 69dc6332bfc0f..9c58a55cb907e 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -23,7 +23,6 @@ "_interval_shared_docs", "_merge_doc", "_shared_docs", - "_shared_doc_kwargs", "_apply_docs", "_new_Index", "_new_PeriodIndex", From f4fa3d644c83a1d6e5df19bee4148a461377bd11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?= Date: Fri, 15 Jan 2021 22:21:46 +0100 Subject: [PATCH 41/41] Move what's new and tidy up @docs --- doc/source/whatsnew/v1.2.0.rst | 1 - doc/source/whatsnew/v1.2.1.rst | 3 +-- pandas/core/frame.py | 8 +------- pandas/core/indexes/base.py | 5 +++-- pandas/core/indexes/multi.py | 4 ++-- 5 files changed, 7 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 7cf5f3bb22582..8e9361125513b 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -307,7 +307,6 @@ Other enhancements - Implement method ``cross`` for :meth:`DataFrame.merge` and :meth:`DataFrame.join` (:issue:`5401`) - When :func:`read_csv`, :func:`read_sas` and :func:`read_json` are called with ``chunksize``/``iterator`` they can be used in a ``with`` statement as they return context-managers (:issue:`38225`) - Augmented the list of named colors available for styling Excel exports, enabling all of CSS4 colors (:issue:`38247`) -- :class:`Index` and :class:`MultiIndex` now have a ``replace()`` method (:issue:`19495`). .. --------------------------------------------------------------------------- diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst index 55fddb8b732e2..405a1352decdb 100644 --- a/doc/source/whatsnew/v1.2.1.rst +++ b/doc/source/whatsnew/v1.2.1.rst @@ -55,8 +55,7 @@ Other - Bumped minimum pymysql version to 0.8.1 to avoid test failures (:issue:`38344`) - Fixed build failure on MacOS 11 in Python 3.9.1 (:issue:`38766`) - Added reference to backwards incompatible ``check_freq`` arg of :func:`testing.assert_frame_equal` and :func:`testing.assert_series_equal` in :ref:`pandas 1.1.0 whats new ` (:issue:`34050`) -- -- +- :class:`Index` and :class:`MultiIndex` now have a ``replace()`` method (:issue:`19495`). .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f0e5c13e977d5..91766a3fbfb8d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -154,7 +154,6 @@ ) from pandas.core.reshape.melt import melt from pandas.core.series import Series -from pandas.core.shared_docs import _shared_doc_kwargs as core_shared_doc_kwargs from pandas.core.sorting import get_group_index, lexsort_indexer, nargsort from pandas.io.common import get_handle @@ -4555,12 +4554,7 @@ def pop(self, item: Hashable) -> Series: """ return super().pop(item=item) - @doc( - NDFrame.replace, - inplace=core_shared_doc_kwargs["inplace"], - replace_iloc=core_shared_doc_kwargs["replace_iloc"], - **_shared_doc_kwargs, - ) + @doc(NDFrame.replace, **_shared_doc_kwargs) def replace( self, to_replace=None, diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5a969023f851e..95d14b6756ea8 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -123,6 +123,7 @@ "raises_section": "", "unique": "Index", "duplicated": "np.ndarray", + "replace_iloc": "", } _index_shared_docs = {} str_t = str @@ -1536,10 +1537,10 @@ def rename(self, name, inplace=False): return self.set_names([name], inplace=inplace) @doc( - _shared_docs["to_replace"], + _shared_docs["replace"], klass=_index_doc_kwargs["klass"], inplace=_index_doc_kwargs["inplace"], - replace_iloc="", + replace_iloc=_index_doc_kwargs["replace_iloc"], ) def replace( self, diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index e222d9472ddb5..36f80c7ee6dcb 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3776,10 +3776,10 @@ def isin(self, values, level=None): __inv__ = make_invalid_op("__inv__") @doc( - _shared_docs["to_replace"], + _shared_docs["replace"], klass=_index_doc_kwargs["klass"], inplace=_index_doc_kwargs["inplace"], - replace_iloc="", + replace_iloc=_index_doc_kwargs["replace_iloc"], ) def replace( self,