From 04c6d2bd8b195e53d3f37dfc9fff7cddbd12867d Mon Sep 17 00:00:00 2001 From: Matt Richards Date: Sun, 31 Oct 2021 16:14:04 +1000 Subject: [PATCH 01/10] TST: add test for GH43668 --- pandas/tests/frame/test_subclass.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 42474ff00ad6d..132cdfbf34a6f 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -12,6 +12,17 @@ ) import pandas._testing as tm +@pytest.fixture() +def gpd_style_subclass_df(): + + class SubclassedDataFrame(DataFrame): + + @property + def _constructor(self): + return SubclassedDataFrame + + return SubclassedDataFrame({'a': [1, 2, 3]}) + class TestDataFrameSubclassing: def test_frame_subclassing_and_slicing(self): @@ -704,6 +715,17 @@ def test_idxmax_preserves_subclass(self): result = df.idxmax() assert isinstance(result, tm.SubclassedSeries) + def test_convert_dtypes_preserves_subclass(self, gpd_style_subclass_df): + # GH 43668 + df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + result = df.convert_dtypes() + assert isinstance(result, tm.SubclassedDataFrame) + + result = gpd_style_subclass_df.convert_dtypes() + print(type(result), type(gpd_style_subclass_df)) + assert isinstance(result, type(gpd_style_subclass_df)) + + def test_equals_subclass(self): # https://github.com/pandas-dev/pandas/pull/34402 # allow subclass in both directions From abc749931a633dfa080f74f67f3fc8de2bc484a3 Mon Sep 17 00:00:00 2001 From: Matt Richards Date: Sun, 31 Oct 2021 16:14:44 +1000 Subject: [PATCH 02/10] BUG: fix convert_dtypes GH43668 --- pandas/core/generic.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 44cddd7bea81c..a74a6a0e35fb5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6216,8 +6216,11 @@ def convert_dtypes( for col_name, col in self.items() ] if len(results) > 0: + result = self._constructor( + concat(results, axis=1, copy=False) + ).__finalize__(self, method="convert_dtypes") # https://github.com/python/mypy/issues/8354 - return cast(NDFrameT, concat(results, axis=1, copy=False)) + return cast(NDFrameT, result) else: return self.copy() From f3768a5bb8a7384d43f66ed4edbc941736c0b0bb Mon Sep 17 00:00:00 2001 From: Matt Richards Date: Sun, 31 Oct 2021 16:53:33 +1000 Subject: [PATCH 03/10] CLN: linting --- pandas/tests/frame/test_subclass.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 132cdfbf34a6f..70a04a654f078 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -12,16 +12,15 @@ ) import pandas._testing as tm + @pytest.fixture() def gpd_style_subclass_df(): - class SubclassedDataFrame(DataFrame): - @property def _constructor(self): return SubclassedDataFrame - return SubclassedDataFrame({'a': [1, 2, 3]}) + return SubclassedDataFrame({"a": [1, 2, 3]}) class TestDataFrameSubclassing: @@ -725,7 +724,6 @@ def test_convert_dtypes_preserves_subclass(self, gpd_style_subclass_df): print(type(result), type(gpd_style_subclass_df)) assert isinstance(result, type(gpd_style_subclass_df)) - def test_equals_subclass(self): # https://github.com/pandas-dev/pandas/pull/34402 # allow subclass in both directions From 7aeb38ef1d4c94fdb7fe23d4800c74b5843beda2 Mon Sep 17 00:00:00 2001 From: Matt Richards Date: Sun, 31 Oct 2021 16:55:26 +1000 Subject: [PATCH 04/10] TST: update finalize calls test --- pandas/tests/generic/test_finalize.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index c1f8b5dd7cf41..135e8cc7b7aba 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -347,10 +347,7 @@ operator.methodcaller("infer_objects"), ), (pd.Series, ([1, 2],), operator.methodcaller("convert_dtypes")), - pytest.param( - (pd.DataFrame, frame_data, operator.methodcaller("convert_dtypes")), - marks=not_implemented_mark, - ), + (pd.DataFrame, frame_data, operator.methodcaller("convert_dtypes")), (pd.Series, ([1, None, 3],), operator.methodcaller("interpolate")), (pd.DataFrame, ({"A": [1, None, 3]},), operator.methodcaller("interpolate")), (pd.Series, ([1, 2],), operator.methodcaller("clip", lower=1)), From 74d7bb8dd866409a7d768d04f19c0eca0d26ee3c Mon Sep 17 00:00:00 2001 From: Matt Richards Date: Sun, 31 Oct 2021 17:45:48 +1000 Subject: [PATCH 05/10] DOC: add what's new entry --- doc/source/whatsnew/v1.4.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index ee8605781a4dd..ad38d3c51dbf8 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -500,6 +500,8 @@ Conversion - Bug in :class:`Series` constructor returning 0 for missing values with dtype ``int64`` and ``False`` for dtype ``bool`` (:issue:`43017`, :issue:`43018`) - Bug in :class:`IntegerDtype` not allowing coercion from string dtype (:issue:`25472`) - Bug in :func:`to_datetime` with ``arg:xr.DataArray`` and ``unit="ns"`` specified raises TypeError (:issue:`44053`) +- Bug in :meth:`DataFrame.convert_dtypes` not returning the correct type when a subclass does not overload +:meth:`_constructor_sliced` (:issue:`43201`) - Strings From 05c922d2e0296b4bc6c47685dfd0e09fab2e727f Mon Sep 17 00:00:00 2001 From: Matt Richards Date: Mon, 1 Nov 2021 19:36:45 +1000 Subject: [PATCH 06/10] CLN split line into multiple lines --- pandas/core/generic.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a74a6a0e35fb5..7e6196c60cab1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6216,9 +6216,8 @@ def convert_dtypes( for col_name, col in self.items() ] if len(results) > 0: - result = self._constructor( - concat(results, axis=1, copy=False) - ).__finalize__(self, method="convert_dtypes") + result = self._constructor(concat(results, axis=1, copy=False)) + result = result.__finalize__(self, method="convert_dtypes") # https://github.com/python/mypy/issues/8354 return cast(NDFrameT, result) else: From 28978227fd50b654a490abd81235501adff35179 Mon Sep 17 00:00:00 2001 From: Matt Richards Date: Tue, 2 Nov 2021 19:44:46 +1000 Subject: [PATCH 07/10] DOC: fix linebreak in rst --- doc/source/whatsnew/v1.4.0.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 2fa9f6c6fb7ee..0b1500666e765 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -503,8 +503,7 @@ Conversion - Bug in :class:`Series` constructor returning 0 for missing values with dtype ``int64`` and ``False`` for dtype ``bool`` (:issue:`43017`, :issue:`43018`) - Bug in :class:`IntegerDtype` not allowing coercion from string dtype (:issue:`25472`) - Bug in :func:`to_datetime` with ``arg:xr.DataArray`` and ``unit="ns"`` specified raises TypeError (:issue:`44053`) -- Bug in :meth:`DataFrame.convert_dtypes` not returning the correct type when a subclass does not overload -:meth:`_constructor_sliced` (:issue:`43201`) +- Bug in :meth:`DataFrame.convert_dtypes` not returning the correct type when a subclass does not overload :meth:`_constructor_sliced` (:issue:`43201`) - Strings From 7fa2f7ebb202f3ce272312469161461e44fa452f Mon Sep 17 00:00:00 2001 From: Matt Richards Date: Sat, 6 Nov 2021 17:40:55 +1000 Subject: [PATCH 08/10] TST:bad fix to mypy issues --- pandas/core/generic.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bd5244c95a59d..b70b72e1e75b9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -18,6 +18,7 @@ Literal, Mapping, Sequence, + Type, cast, final, overload, @@ -6219,7 +6220,9 @@ def convert_dtypes( for col_name, col in self.items() ] if len(results) > 0: - result = self._constructor(concat(results, axis=1, copy=False)) + result = concat(results, axis=1, copy=False) + cons = cast(Type[DataFrame], self._constructor) + result = cons(result) result = result.__finalize__(self, method="convert_dtypes") # https://github.com/python/mypy/issues/8354 return cast(NDFrameT, result) From 299d1393ee19b0a630529a30bcc9aa9e2634e81b Mon Sep 17 00:00:00 2001 From: Matt Richards Date: Sat, 6 Nov 2021 18:13:52 +1000 Subject: [PATCH 09/10] TST: mypy fix broke tests --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e9c4f70958513..e3874d9c0b433 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6221,7 +6221,7 @@ def convert_dtypes( ] if len(results) > 0: result = concat(results, axis=1, copy=False) - cons = cast(Type[DataFrame], self._constructor) + cons = cast(Type["DataFrame"], self._constructor) result = cons(result) result = result.__finalize__(self, method="convert_dtypes") # https://github.com/python/mypy/issues/8354 From 7a573e23f508232e4d7d0f9638bb69ff5d2e994e Mon Sep 17 00:00:00 2001 From: Matt Richards Date: Sun, 7 Nov 2021 10:56:40 +1000 Subject: [PATCH 10/10] CLN: remove print I left in --- pandas/tests/frame/test_subclass.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 70a04a654f078..8d9957b24300f 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -721,7 +721,6 @@ def test_convert_dtypes_preserves_subclass(self, gpd_style_subclass_df): assert isinstance(result, tm.SubclassedDataFrame) result = gpd_style_subclass_df.convert_dtypes() - print(type(result), type(gpd_style_subclass_df)) assert isinstance(result, type(gpd_style_subclass_df)) def test_equals_subclass(self):