diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index a49b29d691692..192b465f6b04a 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -1186,6 +1186,7 @@ ExtensionArray - Fixed bug that caused :meth:`Series.__repr__()` to crash for extension types whose elements are multidimensional arrays (:issue:`33770`). - Fixed bug where :meth:`Series.update` would raise a ``ValueError`` for ``ExtensionArray`` dtypes with missing values (:issue:`33980`) - Fixed bug where :meth:`StringArray.memory_usage` was not implemented (:issue:`33963`) +- Fixed bug where extending :meth:`StringArray._from_sequence` would fail (:issue:`34309`) - Fixed bug where :meth:`DataFrameGroupBy` would ignore the ``min_count`` argument for aggregations on nullable Boolean dtypes (:issue:`34051`) - Fixed bug where the constructor of :class:`DataFrame` with ``dtype='string'`` would fail (:issue:`27953`, :issue:`33623`) - Bug where :class:`DataFrame` column set to scalar extension type was considered an object type rather than the extension type (:issue:`34832`) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index cb1144c18e49c..412ccedfafda5 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -196,7 +196,7 @@ def _validate(self): @classmethod def _from_sequence(cls, scalars, dtype=None, copy=False): if dtype: - assert dtype == "string" + assert isinstance(dtype, StringDtype) # convert non-na-likes to str, and nan-likes to StringDtype.na_value result = lib.ensure_string_array( diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 56a8e21edd004..1f3da1758bee5 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -1,10 +1,14 @@ import operator +from typing import Type import numpy as np import pytest import pandas.util._test_decorators as td +from pandas.core.dtypes.base import register_extension_dtype +from pandas.core.dtypes.common import is_string_dtype + import pandas as pd import pandas._testing as tm @@ -338,6 +342,29 @@ def test_memory_usage(): assert 0 < series.nbytes <= series.memory_usage() < series.memory_usage(deep=True) +def test_string_dtype_subclassing(): + @register_extension_dtype + class MyExtensionDtype(pd.StringDtype): + name = "my_extension" + + def __repr__(self) -> str: + return "MyExtensionDtype" + + @classmethod + def construct_array_type(cls) -> "Type[MyExtensionStringArray]": + return MyExtensionStringArray + + class MyExtensionStringArray(pd.core.arrays.StringArray): + def __init__(self, values, copy=False): + super().__init__(values, copy) + self._dtype = MyExtensionDtype() + + series = pd.Series(["test", "test2"], dtype="my_extension") + assert series.dtype == "my_extension" + assert series.values == ["test", "test2"] + assert is_string_dtype(series) + + @pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64]) def test_astype_from_float_dtype(dtype): # https://github.com/pandas-dev/pandas/issues/36451