From 83904cd6c04b4ded8366bc1756a1e62b3cb2a1d6 Mon Sep 17 00:00:00 2001 From: shubham11941140 <63910248+shubham11941140@users.noreply.github.com> Date: Mon, 23 Aug 2021 18:55:28 +0530 Subject: [PATCH 1/2] Updated parser and test file --- pandas/_libs/parsers.pyx | 13 +++++++++++++ pandas/tests/io/parser/test_na_values.py | 19 +++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 25028b06f7bad..4d87dcefa856d 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -1090,7 +1090,20 @@ cdef class TextReader: # we had a fallback parse on the dtype, so now try to cast # only allow safe casts, eg. with a nan you cannot safely cast to int + #floating casts are handled in this section if col_res is not None and col_dtype is not None: + if is_float_dtype(col_dtype) and col_res.dtype == np.bool_ : + mask = col_res.view(np.uint8) == na_values[np.uint8] + col_res = col_res.astype(col_dtype) + np.putmask(col_res, mask, np.nan) + return col_res, na_count + if is_integer_dtype(col_dtype) and col_res.dtype == np.bool_ : + if na_count > 0: + raise ValueError( + f"cannot safely convert passed user dtype of " + f"{col_dtype} for {np.bool_} dtyped data in " + f"column {i} as it is applicable only in the case of float values and not NA values") + pass try: col_res = col_res.astype(col_dtype, casting='safe') except TypeError: diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py index 2880bf8690b46..ec5cdf2b65898 100644 --- a/pandas/tests/io/parser/test_na_values.py +++ b/pandas/tests/io/parser/test_na_values.py @@ -590,3 +590,22 @@ def test_nan_multi_index(all_parsers): ) tm.assert_frame_equal(result, expected) + +def test_bool_and_nan_to_int(all_parsers): + p = all_parsers + test_data_val = """0 NaN True False""" + with pytest.raises(ValueError, match="convert"): print(p.read_csv(StringIO(test_data_val), dtype = "int")) + +def test_bool_and_nan_to_float(all_parsers): + p = all_parsers + test_data_val = """0 NaN True False""" + result = p.read_csv(StringIO(test_data_val), dtype = "float") + expected = DataFrame.from_dict({"0": [np.nan, 1.0, 0.0]}) + tm.assert_frame_equal(result, expected) + +def test_bool_and_nan_to_bool(all_parsers): + p = all_parsers + test_data_val = """0 NaN True False """ + with pytest.raises(ValueError, match="NA values"): p.read_csv(StringIO(test_data_val), dtype = "bool") + + From 684559ecf25feeaddb52f7184040eb6f16a0e180 Mon Sep 17 00:00:00 2001 From: shubham11941140 <63910248+shubham11941140@users.noreply.github.com> Date: Mon, 23 Aug 2021 19:09:58 +0530 Subject: [PATCH 2/2] Update test_na_values.py --- pandas/tests/io/parser/test_na_values.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py index ec5cdf2b65898..b02ea5d9f4396 100644 --- a/pandas/tests/io/parser/test_na_values.py +++ b/pandas/tests/io/parser/test_na_values.py @@ -594,7 +594,8 @@ def test_nan_multi_index(all_parsers): def test_bool_and_nan_to_int(all_parsers): p = all_parsers test_data_val = """0 NaN True False""" - with pytest.raises(ValueError, match="convert"): print(p.read_csv(StringIO(test_data_val), dtype = "int")) + with pytest.raises(ValueError, match = "convert"): + print(p.read_csv(StringIO(test_data_val), dtype = "int")) def test_bool_and_nan_to_float(all_parsers): p = all_parsers @@ -606,6 +607,5 @@ def test_bool_and_nan_to_float(all_parsers): def test_bool_and_nan_to_bool(all_parsers): p = all_parsers test_data_val = """0 NaN True False """ - with pytest.raises(ValueError, match="NA values"): p.read_csv(StringIO(test_data_val), dtype = "bool") - - + with pytest.raises(ValueError, match = "NA values"): + p.read_csv(StringIO(test_data_val), dtype = "bool")