From 7c2e09270d57d51561593fa72b5f206494d88f0c Mon Sep 17 00:00:00 2001 From: C John Klehm Date: Tue, 26 Mar 2019 17:36:36 -0500 Subject: [PATCH 01/12] Pass the errors and kwargs arguments through to astype in the columns dict logic branch. --- pandas/core/generic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9797069566b4b..245f4403e3b5e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5716,7 +5716,8 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs): results = [] for col_name, col in self.iteritems(): if col_name in dtype: - results.append(col.astype(dtype[col_name], copy=copy)) + results.append(col.astype(dtype=dtype[col_name], copy=copy, + errors=errors, **kwargs)) else: results.append(results.append(col.copy() if copy else col)) From 11e5d421c7e9bc207f40c8057e3f0c9016a245a0 Mon Sep 17 00:00:00 2001 From: C John Klehm Date: Wed, 27 Mar 2019 10:43:23 -0500 Subject: [PATCH 02/12] test_generic.py - Add a test for astype dict errors ignore. --- pandas/tests/generic/test_generic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index d3b63e428b374..510076a118c70 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -202,6 +202,12 @@ def test_downcast(self): expected = o.astype(np.int64) self._compare(result, expected) + def test_multiple_astype_casts(self): + df = DataFrame({'A': [1, 2, 'z'], 'B': [3, 4, 'z']}) + df.astype({'A': int, + 'B': 'datetime64[ns]'}, + errors='ignore') + def test_constructor_compound_dtypes(self): # see gh-5191 # Compound dtypes should raise NotImplementedError. From d7c9d5a350ba9e9fd79e0300a5867911d306c600 Mon Sep 17 00:00:00 2001 From: C John Klehm Date: Fri, 29 Mar 2019 14:07:43 -0500 Subject: [PATCH 03/12] Update the test data to relfect #25905. Add an explicit check for the absense of the exception we'd normally throw. Assert the resulting dataframe against a reference dataframe. Move the test case to test_dtypes. --- pandas/tests/frame/common.py | 30 ++++++++++++++++++++++++++++ pandas/tests/frame/test_dtypes.py | 20 ++++++++++++++++++- pandas/tests/generic/test_generic.py | 6 ------ 3 files changed, 49 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/common.py b/pandas/tests/frame/common.py index 5624f7c1303b6..aa5a4770f3c68 100644 --- a/pandas/tests/frame/common.py +++ b/pandas/tests/frame/common.py @@ -1,3 +1,6 @@ +from contextlib import contextmanager +from typing import Optional, Union + import numpy as np from pandas.util._decorators import cache_readonly @@ -139,3 +142,30 @@ def _check_mixed_int(df, dtype=None): assert(df.dtypes['C'] == dtypes['C']) if dtypes.get('D'): assert(df.dtypes['D'] == dtypes['D']) + + +@contextmanager +def not_raises(expected_exception: Union[Exception, ValueError], + msg: Optional[str] = None) -> None: + """Explicitly checks that a type of exception is not raised inside a + with context. + + References: + SO: how-to-use-pytest-to-check-that-error-is-not-raised + + Parameters + ---------- + expected_exception: that is verified not to be raised. + msg: if given the message to verify in addition to the exception type. + """ + try: + yield + except expected_exception as e: + if not msg: + raise AssertionError( + f"Raised exception {repr(e)} when it should not!") + + elif hasattr(e, 'message') and e.message == msg: + raise AssertionError( + f"Raised exception {repr(e)} with message {e.message} " + + "when it should not!") diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 90a21961ef78d..67d0cd8e25906 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -15,7 +15,7 @@ Categorical, DataFrame, Series, Timedelta, Timestamp, _np_version_under1p14, compat, concat, date_range, option_context) from pandas.core.arrays import integer_array -from pandas.tests.frame.common import TestData +from pandas.tests.frame.common import TestData, not_raises import pandas.util.testing as tm from pandas.util.testing import ( assert_frame_equal, assert_series_equal, makeCustomDataframe as mkdf) @@ -850,6 +850,24 @@ def test_arg_for_errors_in_astype(self): df.astype(np.int8, errors='ignore') + def test_arg_for_errors_in_astype_dictlist(self): + data_df = pd.DataFrame([{'col_a': '1', 'col_b': '16.5%', + 'col_c': 'test'}, + {'col_a': '2.2', 'col_b': '15.3', + 'col_c': 'another_test'} + ]) + reference_df = pd.DataFrame([{'col_a': '1', 'col_b': '16.5%', + 'col_c': 'test'}, + {'col_a': '2.2', 'col_b': '15.3', + 'col_c': 'another_test'} + ], dtype='float64') + type_dict = {'col_a': 'float64', 'col_b': 'float64', 'col_c': 'object'} + + with not_raises(ValueError): + df_astype = data_df.astype(dtype=type_dict, errors='ignore') + + tm.assert_frame_equal(reference_df, df_astype) + @pytest.mark.parametrize('input_vals', [ ([1, 2]), (['1', '2']), diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 510076a118c70..d3b63e428b374 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -202,12 +202,6 @@ def test_downcast(self): expected = o.astype(np.int64) self._compare(result, expected) - def test_multiple_astype_casts(self): - df = DataFrame({'A': [1, 2, 'z'], 'B': [3, 4, 'z']}) - df.astype({'A': int, - 'B': 'datetime64[ns]'}, - errors='ignore') - def test_constructor_compound_dtypes(self): # see gh-5191 # Compound dtypes should raise NotImplementedError. From 0430b38fe3733c5f5a50489a758c8268d140dc6d Mon Sep 17 00:00:00 2001 From: C John Klehm Date: Fri, 29 Mar 2019 15:56:44 -0500 Subject: [PATCH 04/12] Add a whatsnew note about the bugfix for astype. --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 49b2349851479..d261fced0bb61 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -275,7 +275,7 @@ Numeric Conversion ^^^^^^^^^^ -- +- Bug in :func:`DataFrame.astype()` when passing a dict of columns and types the `errors` parameter was discarded by mistake. (:issue:`25905`) - - From 19afd080886393e4db02c915211d09716ee787c2 Mon Sep 17 00:00:00 2001 From: C John Klehm Date: Fri, 29 Mar 2019 16:10:29 -0500 Subject: [PATCH 05/12] Keep Python 3.5 happy by removing the f-strings. --- pandas/tests/frame/common.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/common.py b/pandas/tests/frame/common.py index aa5a4770f3c68..b70e8e2e5bfe6 100644 --- a/pandas/tests/frame/common.py +++ b/pandas/tests/frame/common.py @@ -163,9 +163,9 @@ def not_raises(expected_exception: Union[Exception, ValueError], except expected_exception as e: if not msg: raise AssertionError( - f"Raised exception {repr(e)} when it should not!") + "Raised exception {} when it should not!".format(repr(e))) elif hasattr(e, 'message') and e.message == msg: raise AssertionError( - f"Raised exception {repr(e)} with message {e.message} " + - "when it should not!") + "Raised exception {} with message {} when it should not!" + .format(repr(e), e.message)) From b04757a4b15621a0b4bf581dc3c23b92433535a0 Mon Sep 17 00:00:00 2001 From: C John Klehm Date: Fri, 29 Mar 2019 16:36:10 -0500 Subject: [PATCH 06/12] ValueError inherits from Exception...so not_raises can be simplified..oops. --- pandas/tests/frame/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/common.py b/pandas/tests/frame/common.py index b70e8e2e5bfe6..020b6f3bedf05 100644 --- a/pandas/tests/frame/common.py +++ b/pandas/tests/frame/common.py @@ -145,7 +145,7 @@ def _check_mixed_int(df, dtype=None): @contextmanager -def not_raises(expected_exception: Union[Exception, ValueError], +def not_raises(expected_exception: Exception, msg: Optional[str] = None) -> None: """Explicitly checks that a type of exception is not raised inside a with context. From 3a4849f28f813a6792498018d8c45302fae469bf Mon Sep 17 00:00:00 2001 From: C John Klehm Date: Fri, 29 Mar 2019 18:21:19 -0500 Subject: [PATCH 07/12] common.py - Fix an unused import vestigal from the not_raises simplification in the prior commit. --- pandas/tests/frame/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/common.py b/pandas/tests/frame/common.py index 020b6f3bedf05..297f3f0f268b2 100644 --- a/pandas/tests/frame/common.py +++ b/pandas/tests/frame/common.py @@ -1,5 +1,5 @@ from contextlib import contextmanager -from typing import Optional, Union +from typing import Optional import numpy as np From 1c35cc70108a0407d964c26b5cfc07d654dd4ebc Mon Sep 17 00:00:00 2001 From: C John Klehm Date: Sat, 30 Mar 2019 18:48:31 -0500 Subject: [PATCH 08/12] Simplify the test by not checking explicitly for the exception it shouldn't raise. Add a comment about addressing GH25905. Improve the dict formatting. Simplify the dict key names to make the test easier to read. --- doc/source/whatsnew/v0.25.0.rst | 2 +- pandas/tests/frame/common.py | 27 ------------------- pandas/tests/frame/test_dtypes.py | 43 +++++++++++++++++++------------ 3 files changed, 28 insertions(+), 44 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index d261fced0bb61..a2bacbbc09b2c 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -275,7 +275,7 @@ Numeric Conversion ^^^^^^^^^^ -- Bug in :func:`DataFrame.astype()` when passing a dict of columns and types the `errors` parameter was discarded by mistake. (:issue:`25905`) +- Bug in :func:`DataFrame.astype()` when passing a dict of columns and types the `errors` parameter was ignored. (:issue:`25905`) - - diff --git a/pandas/tests/frame/common.py b/pandas/tests/frame/common.py index 297f3f0f268b2..a736542d22a63 100644 --- a/pandas/tests/frame/common.py +++ b/pandas/tests/frame/common.py @@ -142,30 +142,3 @@ def _check_mixed_int(df, dtype=None): assert(df.dtypes['C'] == dtypes['C']) if dtypes.get('D'): assert(df.dtypes['D'] == dtypes['D']) - - -@contextmanager -def not_raises(expected_exception: Exception, - msg: Optional[str] = None) -> None: - """Explicitly checks that a type of exception is not raised inside a - with context. - - References: - SO: how-to-use-pytest-to-check-that-error-is-not-raised - - Parameters - ---------- - expected_exception: that is verified not to be raised. - msg: if given the message to verify in addition to the exception type. - """ - try: - yield - except expected_exception as e: - if not msg: - raise AssertionError( - "Raised exception {} when it should not!".format(repr(e))) - - elif hasattr(e, 'message') and e.message == msg: - raise AssertionError( - "Raised exception {} with message {} when it should not!" - .format(repr(e), e.message)) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 67d0cd8e25906..c5551889ab1ee 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -15,7 +15,7 @@ Categorical, DataFrame, Series, Timedelta, Timestamp, _np_version_under1p14, compat, concat, date_range, option_context) from pandas.core.arrays import integer_array -from pandas.tests.frame.common import TestData, not_raises +from pandas.tests.frame.common import TestData import pandas.util.testing as tm from pandas.util.testing import ( assert_frame_equal, assert_series_equal, makeCustomDataframe as mkdf) @@ -851,22 +851,33 @@ def test_arg_for_errors_in_astype(self): df.astype(np.int8, errors='ignore') def test_arg_for_errors_in_astype_dictlist(self): - data_df = pd.DataFrame([{'col_a': '1', 'col_b': '16.5%', - 'col_c': 'test'}, - {'col_a': '2.2', 'col_b': '15.3', - 'col_c': 'another_test'} + """Test that the errors=ignore argument silences the exception that is + thrown by default for data that cannot be converted to the specified + type with astype. + + Test for GH-25905 + """ + data_df = pd.DataFrame([{'a': '1', + 'b': '16.5%', + 'c': 'test'}, + {'a': '2.2', + 'b': '15.3', + 'c': 'another_test'} ]) - reference_df = pd.DataFrame([{'col_a': '1', 'col_b': '16.5%', - 'col_c': 'test'}, - {'col_a': '2.2', 'col_b': '15.3', - 'col_c': 'another_test'} - ], dtype='float64') - type_dict = {'col_a': 'float64', 'col_b': 'float64', 'col_c': 'object'} - - with not_raises(ValueError): - df_astype = data_df.astype(dtype=type_dict, errors='ignore') - - tm.assert_frame_equal(reference_df, df_astype) + expected = pd.DataFrame([{'a': '1', + 'b': '16.5%', + 'c': 'test'}, + {'a': '2.2', + 'b': '15.3', + 'c': 'another_test'} + ], dtype='float64') + type_dict = {'a': 'float64', + 'b': 'float64', + 'c': 'object'} + + result = data_df.astype(dtype=type_dict, errors='ignore') + + tm.assert_frame_equal(expected, result) @pytest.mark.parametrize('input_vals', [ ([1, 2]), From 3190b0f1fd6863b565be1e634a77e8ed429c2fa4 Mon Sep 17 00:00:00 2001 From: C John Klehm Date: Sat, 30 Mar 2019 18:58:09 -0500 Subject: [PATCH 09/12] Remove some unused imports from the previous revisions. --- pandas/tests/frame/common.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tests/frame/common.py b/pandas/tests/frame/common.py index a736542d22a63..5624f7c1303b6 100644 --- a/pandas/tests/frame/common.py +++ b/pandas/tests/frame/common.py @@ -1,6 +1,3 @@ -from contextlib import contextmanager -from typing import Optional - import numpy as np from pandas.util._decorators import cache_readonly From ce115a8ff91207f3c29f021421ca8abfa61190a5 Mon Sep 17 00:00:00 2001 From: C John Klehm Date: Sat, 30 Mar 2019 18:59:10 -0500 Subject: [PATCH 10/12] Remove the docstring from the test. --- pandas/tests/frame/test_dtypes.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index c5551889ab1ee..4741ae564edc1 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -851,12 +851,7 @@ def test_arg_for_errors_in_astype(self): df.astype(np.int8, errors='ignore') def test_arg_for_errors_in_astype_dictlist(self): - """Test that the errors=ignore argument silences the exception that is - thrown by default for data that cannot be converted to the specified - type with astype. - - Test for GH-25905 - """ + # GH-25905 data_df = pd.DataFrame([{'a': '1', 'b': '16.5%', 'c': 'test'}, From f2dd79bf97a0c640a77339dfc0a25406b5a9581a Mon Sep 17 00:00:00 2001 From: C John Klehm Date: Sat, 30 Mar 2019 19:08:59 -0500 Subject: [PATCH 11/12] One line per dict formatting. --- pandas/tests/frame/test_dtypes.py | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 4741ae564edc1..9b9de71448d37 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -852,23 +852,14 @@ def test_arg_for_errors_in_astype(self): def test_arg_for_errors_in_astype_dictlist(self): # GH-25905 - data_df = pd.DataFrame([{'a': '1', - 'b': '16.5%', - 'c': 'test'}, - {'a': '2.2', - 'b': '15.3', - 'c': 'another_test'} - ]) - expected = pd.DataFrame([{'a': '1', - 'b': '16.5%', - 'c': 'test'}, - {'a': '2.2', - 'b': '15.3', - 'c': 'another_test'} - ], dtype='float64') - type_dict = {'a': 'float64', - 'b': 'float64', - 'c': 'object'} + data_df = pd.DataFrame([ + {'a': '1', 'b': '16.5%', 'c': 'test'}, + {'a': '2.2', 'b': '15.3', 'c': 'another_test'}]) + expected = pd.DataFrame([ + {'a': '1', 'b': '16.5%', 'c': 'test'}, + {'a': '2.2', 'b': '15.3', 'c': 'another_test'}], + dtype='float64') + type_dict = {'a': 'float64', 'b': 'float64', 'c': 'object'} result = data_df.astype(dtype=type_dict, errors='ignore') From 343e26cb5578665299fdf1fbccae934f3c94bdd8 Mon Sep 17 00:00:00 2001 From: C John Klehm Date: Sat, 30 Mar 2019 19:32:21 -0500 Subject: [PATCH 12/12] PEP8 E241 fixes. Change the literals for the expected df to make it clearer what is converted by astype. Swap the position of result and expected to match the style of the rest of the file. --- pandas/tests/frame/test_dtypes.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 9b9de71448d37..11c6a524239a1 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -852,18 +852,17 @@ def test_arg_for_errors_in_astype(self): def test_arg_for_errors_in_astype_dictlist(self): # GH-25905 - data_df = pd.DataFrame([ - {'a': '1', 'b': '16.5%', 'c': 'test'}, - {'a': '2.2', 'b': '15.3', 'c': 'another_test'}]) + df = pd.DataFrame([ + {'a': '1', 'b': '16.5%', 'c': 'test'}, + {'a': '2.2', 'b': '15.3', 'c': 'another_test'}]) expected = pd.DataFrame([ - {'a': '1', 'b': '16.5%', 'c': 'test'}, - {'a': '2.2', 'b': '15.3', 'c': 'another_test'}], - dtype='float64') + {'a': 1.0, 'b': '16.5%', 'c': 'test'}, + {'a': 2.2, 'b': '15.3', 'c': 'another_test'}]) type_dict = {'a': 'float64', 'b': 'float64', 'c': 'object'} - result = data_df.astype(dtype=type_dict, errors='ignore') + result = df.astype(dtype=type_dict, errors='ignore') - tm.assert_frame_equal(expected, result) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize('input_vals', [ ([1, 2]),