From 22a90b681b2654d5ac82bf809073828ff4144513 Mon Sep 17 00:00:00 2001 From: Simon Brugman Date: Fri, 22 May 2020 14:38:22 +0200 Subject: [PATCH 1/8] Fix: StringDtype extension assertion --- pandas/core/arrays/string_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 537b1cf3dd439..c4871d8c29439 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -193,7 +193,7 @@ def _validate(self): @classmethod def _from_sequence(cls, scalars, dtype=None, copy=False): if dtype: - assert dtype == "string" + assert isinstance(dtype, StringDtype) result = np.asarray(scalars, dtype="object") if copy and result is scalars: From 88234ad8f82e518a4cda906edf8d7482d9938e3a Mon Sep 17 00:00:00 2001 From: sbrugman Date: Fri, 22 May 2020 15:11:15 +0200 Subject: [PATCH 2/8] Tests and whatsnew --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/tests/arrays/string_/test_string.py | 26 ++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 19db7dcb4b83e..a5456f0feebb3 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -870,6 +870,7 @@ ExtensionArray - Fixed bug where :meth:`StringArray.memory_usage` was not implemented (:issue:`33963`) - Fixed bug where :meth:`DataFrameGroupBy` would ignore the ``min_count`` argument for aggregations on nullable boolean dtypes (:issue:`34051`) - Fixed bug that `DataFrame(columns=.., dtype='string')` would fail (:issue:`27953`, :issue:`33623`) +- Fixed bug where extending :meth:`StringArray._from_sequence` would fail (:issue:`34309`) Other ^^^^^ diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 6f9a1a5be4c43..55d244985f438 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -332,3 +332,29 @@ def test_memory_usage(): series = pd.Series(["a", "b", "c"], dtype="string") assert 0 < series.nbytes <= series.memory_usage() < series.memory_usage(deep=True) + + +def test_extension_assert(): + import pandas as pd + from pandas import StringDtype + from pandas.core.arrays import StringArray + from pandas.core.dtypes.dtypes import register_extension_dtype + + @register_extension_dtype + class MyExtensionDtype(StringDtype): + name = "my_extension" + + def __repr__(self) -> str: + return "MyExtensionDtype" + + @classmethod + def construct_array_type(cls) -> "Type[MyExtensionStringArray]": + return MyExtensionStringArray + + class MyExtensionStringArray(StringArray): + def __init__(self, values, copy=False): + super().__init__(values, copy) + self._dtype = MyExtensionDtype() + + series = pd.Series(["test", "test2"], dtype="my_extension") + assert series.dtype == "my_extension" From 4d5be7db57ab0beddaf5c6c4b4adcd667b4c4115 Mon Sep 17 00:00:00 2001 From: sbrugman Date: Fri, 22 May 2020 16:17:38 +0200 Subject: [PATCH 3/8] Fix typing --- pandas/tests/arrays/string_/test_string.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 55d244985f438..fead40f21d0c3 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -1,4 +1,5 @@ import operator +from typing import Type import numpy as np import pytest From f485603991cead49fcca919bf05e21f571cb9908 Mon Sep 17 00:00:00 2001 From: sbrugman Date: Mon, 25 May 2020 18:11:15 +0200 Subject: [PATCH 4/8] Update tests --- pandas/tests/arrays/string_/test_string.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index fead40f21d0c3..ad97f346e3808 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -8,6 +8,10 @@ import pandas as pd import pandas._testing as tm +from pandas import StringDtype +from pandas.core.arrays import StringArray +from pandas.core.dtypes.common import is_string_dtype +from pandas.core.dtypes.dtypes import register_extension_dtype def test_repr(): @@ -335,12 +339,7 @@ def test_memory_usage(): assert 0 < series.nbytes <= series.memory_usage() < series.memory_usage(deep=True) -def test_extension_assert(): - import pandas as pd - from pandas import StringDtype - from pandas.core.arrays import StringArray - from pandas.core.dtypes.dtypes import register_extension_dtype - +def test_string_dtype_subclassing(): @register_extension_dtype class MyExtensionDtype(StringDtype): name = "my_extension" @@ -359,3 +358,5 @@ def __init__(self, values, copy=False): series = pd.Series(["test", "test2"], dtype="my_extension") assert series.dtype == "my_extension" + assert series.values == ["test", "test2"] + assert is_string_dtype(series) From 2a4ca331a8bd3c9fda7a66bd42a15aee1d1652b5 Mon Sep 17 00:00:00 2001 From: sbrugman Date: Mon, 25 May 2020 19:11:18 +0200 Subject: [PATCH 5/8] Isort --- pandas/tests/arrays/string_/test_string.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index ad97f346e3808..5db8548ed53c0 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -6,12 +6,13 @@ import pandas.util._test_decorators as td +from pandas.core.dtypes.common import is_string_dtype +from pandas.core.dtypes.dtypes import register_extension_dtype + import pandas as pd -import pandas._testing as tm from pandas import StringDtype +import pandas._testing as tm from pandas.core.arrays import StringArray -from pandas.core.dtypes.common import is_string_dtype -from pandas.core.dtypes.dtypes import register_extension_dtype def test_repr(): From 19d4bd7ede5922d91fbed5a4abc3e4971fcb91d7 Mon Sep 17 00:00:00 2001 From: sbrugman Date: Tue, 26 May 2020 19:20:50 +0200 Subject: [PATCH 6/8] Decrease the number of imports --- pandas/tests/arrays/string_/test_string.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 5db8548ed53c0..b35724fa5c6b6 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -10,9 +10,7 @@ from pandas.core.dtypes.dtypes import register_extension_dtype import pandas as pd -from pandas import StringDtype import pandas._testing as tm -from pandas.core.arrays import StringArray def test_repr(): @@ -342,7 +340,7 @@ def test_memory_usage(): def test_string_dtype_subclassing(): @register_extension_dtype - class MyExtensionDtype(StringDtype): + class MyExtensionDtype(pd.StringDtype): name = "my_extension" def __repr__(self) -> str: @@ -352,7 +350,7 @@ def __repr__(self) -> str: def construct_array_type(cls) -> "Type[MyExtensionStringArray]": return MyExtensionStringArray - class MyExtensionStringArray(StringArray): + class MyExtensionStringArray(pd.core.arrays.StringArray): def __init__(self, values, copy=False): super().__init__(values, copy) self._dtype = MyExtensionDtype() From b6c89df34ab40cc02337a73b612938cc4e4f2d96 Mon Sep 17 00:00:00 2001 From: Simon Brugman Date: Wed, 23 Sep 2020 22:41:44 +0200 Subject: [PATCH 7/8] Update test_string.py --- pandas/tests/arrays/string_/test_string.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 8a5c96c2afcfd..cb105af6d6244 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -363,7 +363,7 @@ def __init__(self, values, copy=False): assert series.dtype == "my_extension" assert series.values == ["test", "test2"] assert is_string_dtype(series) - + @pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64]) def test_astype_from_float_dtype(dtype): From d7b5c9fd56f892f84dcc80e4a1d80de3e795f596 Mon Sep 17 00:00:00 2001 From: Simon Brugman Date: Wed, 23 Sep 2020 23:22:39 +0200 Subject: [PATCH 8/8] Update test_string.py --- pandas/tests/arrays/string_/test_string.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index cb105af6d6244..1f3da1758bee5 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -6,8 +6,8 @@ import pandas.util._test_decorators as td +from pandas.core.dtypes.base import register_extension_dtype from pandas.core.dtypes.common import is_string_dtype -from pandas.core.dtypes.dtypes import register_extension_dtype import pandas as pd import pandas._testing as tm