From 65d8e9ad96b7c0228dd907f133db520e0c6daf27 Mon Sep 17 00:00:00 2001 From: Santhosh18 Date: Tue, 30 Jun 2020 18:02:42 +0530 Subject: [PATCH 1/2] Added support for '0' and '1' in BooleanArray._from_sequence_of_strings method --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/arrays/boolean.py | 4 ++-- pandas/tests/arrays/boolean/test_construction.py | 5 +++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 040253ebe7279..a8d5763ae5abc 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -328,6 +328,7 @@ Other enhancements - :meth:`DataFrame.to_html` and :meth:`DataFrame.to_string`'s ``col_space`` parameter now accepts a list or dict to change only some specific columns' width (:issue:`28917`). - :meth:`DataFrame.to_excel` can now also write OpenOffice spreadsheet (.ods) files (:issue:`27222`) - :meth:`~Series.explode` now accepts ``ignore_index`` to reset the index, similarly to :meth:`pd.concat` or :meth:`DataFrame.sort_values` (:issue:`34932`). +- :meth:`BooleanArray._from_sequence_of_strings` now accepts "0", "0.0", "1", "1.0" (:issue:`34859`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 9f1c2c6e668ad..dbce71b77a425 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -286,9 +286,9 @@ def _from_sequence_of_strings( def map_string(s): if isna(s): return s - elif s in ["True", "TRUE", "true"]: + elif s in ["True", "TRUE", "true", "1", "1.0"]: return True - elif s in ["False", "FALSE", "false"]: + elif s in ["False", "FALSE", "false", "0", "0.0"]: return False else: raise ValueError(f"{s} cannot be cast to bool") diff --git a/pandas/tests/arrays/boolean/test_construction.py b/pandas/tests/arrays/boolean/test_construction.py index f7354a089df3b..be19211947d63 100644 --- a/pandas/tests/arrays/boolean/test_construction.py +++ b/pandas/tests/arrays/boolean/test_construction.py @@ -247,10 +247,11 @@ def test_coerce_to_numpy_array(): def test_to_boolean_array_from_strings(): result = BooleanArray._from_sequence_of_strings( - np.array(["True", "False", np.nan], dtype=object) + np.array(["True", "False", "1.0", "0", np.nan], dtype=object) ) expected = BooleanArray( - np.array([True, False, False]), np.array([False, False, True]) + np.array([True, False, True, False, False]), + np.array([False, False, False, False, True]), ) tm.assert_extension_array_equal(result, expected) From 5f436ca12efdfe9a779fb6b05219e90f6b1a4cb4 Mon Sep 17 00:00:00 2001 From: Santhosh18 Date: Wed, 1 Jul 2020 17:53:36 +0530 Subject: [PATCH 2/2] Added test for read_csv and modified release notes --- doc/source/whatsnew/v1.1.0.rst | 2 +- .../tests/arrays/boolean/test_construction.py | 6 ++--- pandas/tests/io/parser/test_dtypes.py | 22 ++++++++++++++++++- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index a8d5763ae5abc..74dca91636577 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -328,7 +328,7 @@ Other enhancements - :meth:`DataFrame.to_html` and :meth:`DataFrame.to_string`'s ``col_space`` parameter now accepts a list or dict to change only some specific columns' width (:issue:`28917`). - :meth:`DataFrame.to_excel` can now also write OpenOffice spreadsheet (.ods) files (:issue:`27222`) - :meth:`~Series.explode` now accepts ``ignore_index`` to reset the index, similarly to :meth:`pd.concat` or :meth:`DataFrame.sort_values` (:issue:`34932`). -- :meth:`BooleanArray._from_sequence_of_strings` now accepts "0", "0.0", "1", "1.0" (:issue:`34859`) +- :meth:`read_csv` now accepts string values like "0", "0.0", "1", "1.0" as convertible to the nullable boolean dtype (:issue:`34859`) .. --------------------------------------------------------------------------- diff --git a/pandas/tests/arrays/boolean/test_construction.py b/pandas/tests/arrays/boolean/test_construction.py index be19211947d63..2f5c61304d415 100644 --- a/pandas/tests/arrays/boolean/test_construction.py +++ b/pandas/tests/arrays/boolean/test_construction.py @@ -247,11 +247,11 @@ def test_coerce_to_numpy_array(): def test_to_boolean_array_from_strings(): result = BooleanArray._from_sequence_of_strings( - np.array(["True", "False", "1.0", "0", np.nan], dtype=object) + np.array(["True", "False", "1", "1.0", "0", "0.0", np.nan], dtype=object) ) expected = BooleanArray( - np.array([True, False, True, False, False]), - np.array([False, False, False, False, True]), + np.array([True, False, True, True, False, False, False]), + np.array([False, False, False, False, False, False, True]), ) tm.assert_extension_array_equal(result, expected) diff --git a/pandas/tests/io/parser/test_dtypes.py b/pandas/tests/io/parser/test_dtypes.py index 6298d1e5498f3..6ac310e3b2227 100644 --- a/pandas/tests/io/parser/test_dtypes.py +++ b/pandas/tests/io/parser/test_dtypes.py @@ -561,9 +561,13 @@ def test_boolean_dtype(all_parsers): "True", "TRUE", "true", + "1", + "1.0", "False", "FALSE", "false", + "0", + "0.0", "NaN", "nan", "NA", @@ -576,7 +580,23 @@ def test_boolean_dtype(all_parsers): expected = pd.DataFrame( { "a": pd.array( - [True, True, True, False, False, False, None, None, None, None, None], + [ + True, + True, + True, + True, + True, + False, + False, + False, + False, + False, + None, + None, + None, + None, + None, + ], dtype="boolean", ) }