From 2283ae737ba22ef69092d4bb49d69ac65666b147 Mon Sep 17 00:00:00 2001 From: saldanhad Date: Fri, 20 Sep 2024 19:02:29 +0530 Subject: [PATCH 1/5] seperate examples for pandas.str.is methods --- pandas/core/strings/accessor.py | 102 ++++++++++++++++++++++---------- 1 file changed, 70 insertions(+), 32 deletions(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 6d10365a1b968..e635d39e7c5e1 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -3455,27 +3455,40 @@ def casefold(self): Series.str.islower : Check whether all characters are lowercase. Series.str.isupper : Check whether all characters are uppercase. Series.str.istitle : Check whether all characters are titlecase. + """ + _shared_docs["isalpha"] = """ + Checks for Alphabetic Characters (only letters). - Examples + Example -------- - **Checks for Alphabetic and Numeric Characters** - + >>> s1 = pd.Series(['one', 'one1', '1', '']) - >>> s1.str.isalpha() 0 True 1 False 2 False 3 False dtype: bool + """ + _shared_docs["isnumeric"] = """ + Checks whether all characters in each string are numeric (represent numbers). + Example + -------- + >>> s1 = pd.Series(['one', 'one1', '1', '']) >>> s1.str.isnumeric() 0 False 1 False 2 True 3 False dtype: bool + """ + _shared_docs["isalnum"] = """ + Checks whether all characters in each string are alphanumeric (letters and numbers). + Example + -------- + >>> s1 = pd.Series(['one', 'one1', '1', '']) >>> s1.str.isalnum() 0 True 1 True @@ -3492,47 +3505,56 @@ def casefold(self): 1 False 2 False dtype: bool + """ + _shared_docs["isdecimal"] = """ + Checks for characters used to form numbers in base 10. - **More Detailed Checks for Numeric Characters** - - There are several different but overlapping sets of numeric characters that - can be checked for. + Example + -------- >>> s3 = pd.Series(['23', '³', '⅕', '']) - - The ``s3.str.isdecimal`` method checks for characters used to form numbers - in base 10. - >>> s3.str.isdecimal() 0 True 1 False 2 False 3 False dtype: bool - - The ``s.str.isdigit`` method is the same as ``s3.str.isdecimal`` but also - includes special digits, like superscripted and subscripted digits in + """ + _shared_docs["isdigit"] = """ + Similar to ``str.isdecimal`` but also includes special digits, like superscripted and subscripted digits in unicode. + Example + -------- + + >>> s3 = pd.Series(['23', '³', '⅕', '']) >>> s3.str.isdigit() 0 True 1 True 2 False 3 False dtype: bool - - The ``s.str.isnumeric`` method is the same as ``s3.str.isdigit`` but also - includes other characters that can represent quantities such as unicode + """ + _shared_docs["isnumeric"] = """ + Similar to ``str.isdigit`` but also includes other characters that can represent quantities such as unicode fractions. + Example + --------- + + >>> s3 = pd.Series(['23', '³', '⅕', '']) >>> s3.str.isnumeric() 0 True 1 True 2 True 3 False dtype: bool - - **Checks for Whitespace** + """ + _shared_docs["isspace"] = """ + Checks for whitespaces. + + Example + -------- >>> s4 = pd.Series([' ', '\\t\\r\\n ', '']) >>> s4.str.isspace() @@ -3540,30 +3562,46 @@ def casefold(self): 1 True 2 False dtype: bool + """ + _shared_docs["islower"] = """ + Checks for lower character case - **Checks for Character Case** + Example + -------- >>> s5 = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', '']) - >>> s5.str.islower() 0 True 1 False 2 False 3 False dtype: bool + """ + + _shared_docs["isupper"] = """ + Checks for upper character case + Example + -------- + + >>> s5 = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', '']) >>> s5.str.isupper() 0 False 1 False 2 True 3 False dtype: bool - + """ + _shared_docs["istitle"] =""" The ``s5.str.istitle`` method checks for whether all words are in title case (whether only the first letter of each word is capitalized). Words are assumed to be as any sequence of non-numeric characters separated by whitespace characters. + Example + ------------ + + >>> s5 = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', '']) >>> s5.str.istitle() 0 False 1 True @@ -3583,31 +3621,31 @@ def casefold(self): # force _noarg_wrapper return type with dtype=np.dtype(bool) (GH 29624) isalnum = _map_and_wrap( - "isalnum", docstring=_shared_docs["ismethods"] % _doc_args["isalnum"] + "isalnum", docstring=(_shared_docs["ismethods"] + _shared_docs["isalnum"]) % _doc_args["isalnum"] ) isalpha = _map_and_wrap( - "isalpha", docstring=_shared_docs["ismethods"] % _doc_args["isalpha"] + "isalpha", docstring=(_shared_docs["ismethods"] + _shared_docs["isalpha"]) % _doc_args["isalpha"] ) isdigit = _map_and_wrap( - "isdigit", docstring=_shared_docs["ismethods"] % _doc_args["isdigit"] + "isdigit", docstring=(_shared_docs["ismethods"] + _shared_docs["isdigit"]) % _doc_args["isdigit"] ) isspace = _map_and_wrap( - "isspace", docstring=_shared_docs["ismethods"] % _doc_args["isspace"] + "isspace", docstring=(_shared_docs["ismethods"] + _shared_docs["isspace"]) % _doc_args["isspace"] ) islower = _map_and_wrap( - "islower", docstring=_shared_docs["ismethods"] % _doc_args["islower"] + "islower", docstring=(_shared_docs["ismethods"] + _shared_docs["islower"]) % _doc_args["islower"] ) isupper = _map_and_wrap( - "isupper", docstring=_shared_docs["ismethods"] % _doc_args["isupper"] + "isupper", docstring=(_shared_docs["ismethods"] + _shared_docs["isupper"]) % _doc_args["isupper"] ) istitle = _map_and_wrap( - "istitle", docstring=_shared_docs["ismethods"] % _doc_args["istitle"] + "istitle", docstring=(_shared_docs["ismethods"] + _shared_docs["istitle"]) % _doc_args["istitle"] ) isnumeric = _map_and_wrap( - "isnumeric", docstring=_shared_docs["ismethods"] % _doc_args["isnumeric"] + "isnumeric", docstring=(_shared_docs["ismethods"] + _shared_docs["isnumeric"]) % _doc_args["isnumeric"] ) isdecimal = _map_and_wrap( - "isdecimal", docstring=_shared_docs["ismethods"] % _doc_args["isdecimal"] + "isdecimal", docstring=(_shared_docs["ismethods"] + _shared_docs["isdecimal"]) % _doc_args["isdecimal"] ) From f19163ca5b4904c7849ece91705acf1c8ddc4d16 Mon Sep 17 00:00:00 2001 From: saldanhad Date: Fri, 20 Sep 2024 19:27:22 +0530 Subject: [PATCH 2/5] pre-commit checks done --- pandas/core/strings/accessor.py | 42 +++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index e635d39e7c5e1..9e95a024f2fb4 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -3461,7 +3461,7 @@ def casefold(self): Example -------- - + >>> s1 = pd.Series(['one', 'one1', '1', '']) >>> s1.str.isalpha() 0 True @@ -3552,7 +3552,7 @@ def casefold(self): """ _shared_docs["isspace"] = """ Checks for whitespaces. - + Example -------- @@ -3592,7 +3592,7 @@ def casefold(self): 3 False dtype: bool """ - _shared_docs["istitle"] =""" + _shared_docs["istitle"] = """ The ``s5.str.istitle`` method checks for whether all words are in title case (whether only the first letter of each word is capitalized). Words are assumed to be as any sequence of non-numeric characters separated by @@ -3621,31 +3621,49 @@ def casefold(self): # force _noarg_wrapper return type with dtype=np.dtype(bool) (GH 29624) isalnum = _map_and_wrap( - "isalnum", docstring=(_shared_docs["ismethods"] + _shared_docs["isalnum"]) % _doc_args["isalnum"] + "isalnum", + docstring=(_shared_docs["ismethods"] + _shared_docs["isalnum"]) + % _doc_args["isalnum"], ) isalpha = _map_and_wrap( - "isalpha", docstring=(_shared_docs["ismethods"] + _shared_docs["isalpha"]) % _doc_args["isalpha"] + "isalpha", + docstring=(_shared_docs["ismethods"] + _shared_docs["isalpha"]) + % _doc_args["isalpha"], ) isdigit = _map_and_wrap( - "isdigit", docstring=(_shared_docs["ismethods"] + _shared_docs["isdigit"]) % _doc_args["isdigit"] + "isdigit", + docstring=(_shared_docs["ismethods"] + _shared_docs["isdigit"]) + % _doc_args["isdigit"], ) isspace = _map_and_wrap( - "isspace", docstring=(_shared_docs["ismethods"] + _shared_docs["isspace"]) % _doc_args["isspace"] + "isspace", + docstring=(_shared_docs["ismethods"] + _shared_docs["isspace"]) + % _doc_args["isspace"], ) islower = _map_and_wrap( - "islower", docstring=(_shared_docs["ismethods"] + _shared_docs["islower"]) % _doc_args["islower"] + "islower", + docstring=(_shared_docs["ismethods"] + _shared_docs["islower"]) + % _doc_args["islower"], ) isupper = _map_and_wrap( - "isupper", docstring=(_shared_docs["ismethods"] + _shared_docs["isupper"]) % _doc_args["isupper"] + "isupper", + docstring=(_shared_docs["ismethods"] + _shared_docs["isupper"]) + % _doc_args["isupper"], ) istitle = _map_and_wrap( - "istitle", docstring=(_shared_docs["ismethods"] + _shared_docs["istitle"]) % _doc_args["istitle"] + "istitle", + docstring=(_shared_docs["ismethods"] + _shared_docs["istitle"]) + % _doc_args["istitle"], ) isnumeric = _map_and_wrap( - "isnumeric", docstring=(_shared_docs["ismethods"] + _shared_docs["isnumeric"]) % _doc_args["isnumeric"] + "isnumeric", + docstring=(_shared_docs["ismethods"] + _shared_docs["isnumeric"]) + % _doc_args["isnumeric"], ) isdecimal = _map_and_wrap( - "isdecimal", docstring=(_shared_docs["ismethods"] + _shared_docs["isdecimal"]) % _doc_args["isdecimal"] + "isdecimal", + docstring=(_shared_docs["ismethods"] + _shared_docs["isdecimal"]) + % _doc_args["isdecimal"], ) From db61289e5950106da7bd48f0e19c069c7e385b14 Mon Sep 17 00:00:00 2001 From: saldanhad Date: Thu, 26 Sep 2024 14:49:30 +0530 Subject: [PATCH 3/5] refactor docstrings for ismethods --- pandas/core/strings/accessor.py | 178 ++++++++++++++++++++++---------- 1 file changed, 122 insertions(+), 56 deletions(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 9e95a024f2fb4..0eb756cab90fa 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -3443,10 +3443,10 @@ def casefold(self): Series or Index of bool Series or Index of boolean values with the same length as the original Series/Index. - + """ + _shared_docs["isalpha"] = """ See Also -------- - Series.str.isalpha : Check whether all characters are alphabetic. Series.str.isnumeric : Check whether all characters are numeric. Series.str.isalnum : Check whether all characters are alphanumeric. Series.str.isdigit : Check whether all characters are digits. @@ -3455,11 +3455,8 @@ def casefold(self): Series.str.islower : Check whether all characters are lowercase. Series.str.isupper : Check whether all characters are uppercase. Series.str.istitle : Check whether all characters are titlecase. - """ - _shared_docs["isalpha"] = """ - Checks for Alphabetic Characters (only letters). - Example + Examples -------- >>> s1 = pd.Series(['one', 'one1', '1', '']) @@ -3471,10 +3468,23 @@ def casefold(self): dtype: bool """ _shared_docs["isnumeric"] = """ - Checks whether all characters in each string are numeric (represent numbers). + See Also + -------- + Series.str.isalpha : Check whether all characters are alphabetic. + Series.str.isalnum : Check whether all characters are alphanumeric. + Series.str.isdigit : Check whether all characters are digits. + Series.str.isdecimal : Check whether all characters are decimal. + Series.str.isspace : Check whether all characters are whitespace. + Series.str.islower : Check whether all characters are lowercase. + Series.str.isupper : Check whether all characters are uppercase. + Series.str.istitle : Check whether all characters are titlecase. - Example + Examples -------- + The ``s.str.isnumeric`` method is the same as ``s3.str.isdigit`` but + also includes other characters that can represent quantities such as + unicode fractions. + >>> s1 = pd.Series(['one', 'one1', '1', '']) >>> s1.str.isnumeric() 0 False @@ -3484,9 +3494,18 @@ def casefold(self): dtype: bool """ _shared_docs["isalnum"] = """ - Checks whether all characters in each string are alphanumeric (letters and numbers). + See Also + -------- + Series.str.isalpha : Check whether all characters are alphabetic. + Series.str.isnumeric : Check whether all characters are numeric. + Series.str.isdigit : Check whether all characters are digits. + Series.str.isdecimal : Check whether all characters are decimal. + Series.str.isspace : Check whether all characters are whitespace. + Series.str.islower : Check whether all characters are lowercase. + Series.str.isupper : Check whether all characters are uppercase. + Series.str.istitle : Check whether all characters are titlecase. - Example + Examples -------- >>> s1 = pd.Series(['one', 'one1', '1', '']) >>> s1.str.isalnum() @@ -3507,10 +3526,21 @@ def casefold(self): dtype: bool """ _shared_docs["isdecimal"] = """ - Checks for characters used to form numbers in base 10. + See Also + -------- + Series.str.isalpha : Check whether all characters are alphabetic. + Series.str.isnumeric : Check whether all characters are numeric. + Series.str.isalnum : Check whether all characters are alphanumeric. + Series.str.isdigit : Check whether all characters are digits. + Series.str.isspace : Check whether all characters are whitespace. + Series.str.islower : Check whether all characters are lowercase. + Series.str.isupper : Check whether all characters are uppercase. + Series.str.istitle : Check whether all characters are titlecase. - Example + Examples -------- + The ``s3.str.isdecimal`` method checks for characters used to form + numbers in base 10. >>> s3 = pd.Series(['23', '³', '⅕', '']) >>> s3.str.isdecimal() @@ -3521,11 +3551,24 @@ def casefold(self): dtype: bool """ _shared_docs["isdigit"] = """ - Similar to ``str.isdecimal`` but also includes special digits, like superscripted and subscripted digits in - unicode. + See Also + -------- + Series.str.isalpha : Check whether all characters are alphabetic. + Series.str.isnumeric : Check whether all characters are numeric. + Series.str.isalnum : Check whether all characters are alphanumeric. + Series.str.isdecimal : Check whether all characters are decimal. + Series.str.isspace : Check whether all characters are whitespace. + Series.str.islower : Check whether all characters are lowercase. + Series.str.isupper : Check whether all characters are uppercase. + Series.str.istitle : Check whether all characters are titlecase. + + Similar to ``str.isdecimal`` but also includes special digits, like + superscripted and subscripted digits in unicode. - Example + Examples -------- + Similar to ``str.isdecimal`` but also includes special digits, like + superscripted and subscripted digits in unicode. >>> s3 = pd.Series(['23', '³', '⅕', '']) >>> s3.str.isdigit() @@ -3535,25 +3578,20 @@ def casefold(self): 3 False dtype: bool """ - _shared_docs["isnumeric"] = """ - Similar to ``str.isdigit`` but also includes other characters that can represent quantities such as unicode - fractions. - - Example - --------- - >>> s3 = pd.Series(['23', '³', '⅕', '']) - >>> s3.str.isnumeric() - 0 True - 1 True - 2 True - 3 False - dtype: bool - """ _shared_docs["isspace"] = """ - Checks for whitespaces. + See Also + -------- + Series.str.isalpha : Check whether all characters are alphabetic. + Series.str.isnumeric : Check whether all characters are numeric. + Series.str.isalnum : Check whether all characters are alphanumeric. + Series.str.isdigit : Check whether all characters are digits. + Series.str.isdecimal : Check whether all characters are decimal. + Series.str.islower : Check whether all characters are lowercase. + Series.str.isupper : Check whether all characters are uppercase. + Series.str.istitle : Check whether all characters are titlecase. - Example + Examples -------- >>> s4 = pd.Series([' ', '\\t\\r\\n ', '']) @@ -3564,9 +3602,18 @@ def casefold(self): dtype: bool """ _shared_docs["islower"] = """ - Checks for lower character case + See Also + -------- + Series.str.isalpha : Check whether all characters are alphabetic. + Series.str.isnumeric : Check whether all characters are numeric. + Series.str.isalnum : Check whether all characters are alphanumeric. + Series.str.isdigit : Check whether all characters are digits. + Series.str.isdecimal : Check whether all characters are decimal. + Series.str.isspace : Check whether all characters are whitespace. + Series.str.isupper : Check whether all characters are uppercase. + Series.str.istitle : Check whether all characters are titlecase. - Example + Examples -------- >>> s5 = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', '']) @@ -3579,9 +3626,18 @@ def casefold(self): """ _shared_docs["isupper"] = """ - Checks for upper character case + See Also + -------- + Series.str.isalpha : Check whether all characters are alphabetic. + Series.str.isnumeric : Check whether all characters are numeric. + Series.str.isalnum : Check whether all characters are alphanumeric. + Series.str.isdigit : Check whether all characters are digits. + Series.str.isdecimal : Check whether all characters are decimal. + Series.str.isspace : Check whether all characters are whitespace. + Series.str.islower : Check whether all characters are lowercase. + Series.str.istitle : Check whether all characters are titlecase. - Example + Examples -------- >>> s5 = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', '']) @@ -3593,14 +3649,24 @@ def casefold(self): dtype: bool """ _shared_docs["istitle"] = """ + See Also + -------- + Series.str.isalpha : Check whether all characters are alphabetic. + Series.str.isnumeric : Check whether all characters are numeric. + Series.str.isalnum : Check whether all characters are alphanumeric. + Series.str.isdigit : Check whether all characters are digits. + Series.str.isdecimal : Check whether all characters are decimal. + Series.str.isspace : Check whether all characters are whitespace. + Series.str.islower : Check whether all characters are lowercase. + Series.str.isupper : Check whether all characters are uppercase. + + Examples + ------------ The ``s5.str.istitle`` method checks for whether all words are in title case (whether only the first letter of each word is capitalized). Words are assumed to be as any sequence of non-numeric characters separated by whitespace characters. - Example - ------------ - >>> s5 = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', '']) >>> s5.str.istitle() 0 False @@ -3622,48 +3688,48 @@ def casefold(self): isalnum = _map_and_wrap( "isalnum", - docstring=(_shared_docs["ismethods"] + _shared_docs["isalnum"]) - % _doc_args["isalnum"], + docstring=_shared_docs["ismethods"] % _doc_args["isalnum"] + + _shared_docs["isalnum"], ) isalpha = _map_and_wrap( "isalpha", - docstring=(_shared_docs["ismethods"] + _shared_docs["isalpha"]) - % _doc_args["isalpha"], + docstring=_shared_docs["ismethods"] % _doc_args["isdigit"] + + _shared_docs["isalpha"], ) isdigit = _map_and_wrap( "isdigit", - docstring=(_shared_docs["ismethods"] + _shared_docs["isdigit"]) - % _doc_args["isdigit"], + docstring=_shared_docs["ismethods"] % _doc_args["isdigit"] + + _shared_docs["isdigit"], ) isspace = _map_and_wrap( "isspace", - docstring=(_shared_docs["ismethods"] + _shared_docs["isspace"]) - % _doc_args["isspace"], + docstring=_shared_docs["ismethods"] % _doc_args["isspace"] + + _shared_docs["isspace"], ) islower = _map_and_wrap( "islower", - docstring=(_shared_docs["ismethods"] + _shared_docs["islower"]) - % _doc_args["islower"], + docstring=_shared_docs["ismethods"] % _doc_args["islower"] + + _shared_docs["islower"], ) isupper = _map_and_wrap( "isupper", - docstring=(_shared_docs["ismethods"] + _shared_docs["isupper"]) - % _doc_args["isupper"], + docstring=_shared_docs["ismethods"] % _doc_args["isupper"] + + _shared_docs["isupper"], ) istitle = _map_and_wrap( "istitle", - docstring=(_shared_docs["ismethods"] + _shared_docs["istitle"]) - % _doc_args["istitle"], + docstring=_shared_docs["ismethods"] % _doc_args["istitle"] + + _shared_docs["istitle"], ) isnumeric = _map_and_wrap( "isnumeric", - docstring=(_shared_docs["ismethods"] + _shared_docs["isnumeric"]) - % _doc_args["isnumeric"], + docstring=_shared_docs["ismethods"] % _doc_args["isnumeric"] + + _shared_docs["isnumeric"], ) isdecimal = _map_and_wrap( "isdecimal", - docstring=(_shared_docs["ismethods"] + _shared_docs["isdecimal"]) - % _doc_args["isdecimal"], + docstring=_shared_docs["ismethods"] % _doc_args["isdecimal"] + + _shared_docs["isdecimal"], ) From 756b4cc38e9042d6b9ad5e0a805747d7514ba933 Mon Sep 17 00:00:00 2001 From: saldanhad Date: Sat, 28 Sep 2024 18:52:13 +0530 Subject: [PATCH 4/5] remove duplicated line for isdecimal --- pandas/core/strings/accessor.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 0eb756cab90fa..ff5120a134313 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -3562,9 +3562,6 @@ def casefold(self): Series.str.isupper : Check whether all characters are uppercase. Series.str.istitle : Check whether all characters are titlecase. - Similar to ``str.isdecimal`` but also includes special digits, like - superscripted and subscripted digits in unicode. - Examples -------- Similar to ``str.isdecimal`` but also includes special digits, like From 59c11f41c86c24debc65c8788df3f26adb8656fc Mon Sep 17 00:00:00 2001 From: saldanhad Date: Sun, 29 Sep 2024 19:23:19 +0530 Subject: [PATCH 5/5] implement changes post review --- pandas/core/strings/accessor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index ff5120a134313..10117aa6bf503 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -3690,7 +3690,7 @@ def casefold(self): ) isalpha = _map_and_wrap( "isalpha", - docstring=_shared_docs["ismethods"] % _doc_args["isdigit"] + docstring=_shared_docs["ismethods"] % _doc_args["isalpha"] + _shared_docs["isalpha"], ) isdigit = _map_and_wrap(