diff --git a/pandas/core/arrays/arrow/dtype.py b/pandas/core/arrays/arrow/dtype.py index 331e66698cc35..fdb9ac877831b 100644 --- a/pandas/core/arrays/arrow/dtype.py +++ b/pandas/core/arrays/arrow/dtype.py @@ -203,6 +203,13 @@ def construct_from_string(cls, string: str) -> ArrowDtype: except ValueError as err: has_parameters = re.search(r"\[.*\]", base_type) if has_parameters: + # Fallback to try common temporal types + try: + return cls._parse_temporal_dtype_string(base_type) + except (NotImplementedError, ValueError): + # Fall through to raise with nice exception message below + pass + raise NotImplementedError( "Passing pyarrow type specific parameters " f"({has_parameters.group()}) in the string is not supported. " @@ -212,6 +219,35 @@ def construct_from_string(cls, string: str) -> ArrowDtype: raise TypeError(f"'{base_type}' is not a valid pyarrow data type.") from err return cls(pa_dtype) + # TODO(arrow#33642): This can be removed once supported by pyarrow + @classmethod + def _parse_temporal_dtype_string(cls, string: str) -> ArrowDtype: + """ + Construct a temporal ArrowDtype from string. + """ + # we assume + # 1) "[pyarrow]" has already been stripped from the end of our string. + # 2) we know "[" is present + head, tail = string.split("[", 1) + + if not tail.endswith("]"): + raise ValueError + tail = tail[:-1] + + if head == "timestamp": + assert "," in tail # otherwise type_for_alias should work + unit, tz = tail.split(",", 1) + unit = unit.strip() + tz = tz.strip() + if tz.startswith("tz="): + tz = tz[3:] + + pa_type = pa.timestamp(unit, tz=tz) + dtype = cls(pa_type) + return dtype + + raise NotImplementedError(string) + @property def _is_numeric(self) -> bool: """ diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py index 2635343d73fd7..392a75f8a69a7 100644 --- a/pandas/tests/extension/base/dtype.py +++ b/pandas/tests/extension/base/dtype.py @@ -20,14 +20,6 @@ def test_kind(self, dtype): valid = set("biufcmMOSUV") assert dtype.kind in valid - def test_construct_from_string_own_name(self, dtype): - result = dtype.construct_from_string(dtype.name) - assert type(result) is type(dtype) - - # check OK as classmethod - result = type(dtype).construct_from_string(dtype.name) - assert type(result) is type(dtype) - def test_is_dtype_from_name(self, dtype): result = type(dtype).is_dtype(dtype.name) assert result is True @@ -97,9 +89,13 @@ def test_eq(self, dtype): assert dtype == dtype.name assert dtype != "anonther_type" - def test_construct_from_string(self, dtype): - dtype_instance = type(dtype).construct_from_string(dtype.name) - assert isinstance(dtype_instance, type(dtype)) + def test_construct_from_string_own_name(self, dtype): + result = dtype.construct_from_string(dtype.name) + assert type(result) is type(dtype) + + # check OK as classmethod + result = type(dtype).construct_from_string(dtype.name) + assert type(result) is type(dtype) def test_construct_from_string_another_type_raises(self, dtype): msg = f"Cannot construct a '{type(dtype).__name__}' from 'another_type'" diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 2140a2e71eda9..c53b6e6658979 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -250,13 +250,9 @@ def test_astype_str(self, data, request): class TestConstructors(base.BaseConstructorsTests): def test_from_dtype(self, data, request): pa_dtype = data.dtype.pyarrow_dtype - if (pa.types.is_timestamp(pa_dtype) and pa_dtype.tz) or pa.types.is_string( - pa_dtype - ): - if pa.types.is_string(pa_dtype): - reason = "ArrowDtype(pa.string()) != StringDtype('pyarrow')" - else: - reason = f"pyarrow.type_for_alias cannot infer {pa_dtype}" + + if pa.types.is_string(pa_dtype): + reason = "ArrowDtype(pa.string()) != StringDtype('pyarrow')" request.node.add_marker( pytest.mark.xfail( reason=reason, @@ -604,65 +600,24 @@ def test_in_numeric_groupby(self, data_for_grouping): class TestBaseDtype(base.BaseDtypeTests): def test_construct_from_string_own_name(self, dtype, request): pa_dtype = dtype.pyarrow_dtype - if pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None: - request.node.add_marker( - pytest.mark.xfail( - raises=NotImplementedError, - reason=f"pyarrow.type_for_alias cannot infer {pa_dtype}", - ) - ) - elif pa.types.is_string(pa_dtype): - request.node.add_marker( - pytest.mark.xfail( - raises=TypeError, - reason=( - "Still support StringDtype('pyarrow') " - "over ArrowDtype(pa.string())" - ), - ) - ) + + if pa.types.is_string(pa_dtype): + # We still support StringDtype('pyarrow') over ArrowDtype(pa.string()) + msg = r"string\[pyarrow\] should be constructed by StringDtype" + with pytest.raises(TypeError, match=msg): + dtype.construct_from_string(dtype.name) + + return + super().test_construct_from_string_own_name(dtype) def test_is_dtype_from_name(self, dtype, request): pa_dtype = dtype.pyarrow_dtype - if pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None: - request.node.add_marker( - pytest.mark.xfail( - raises=NotImplementedError, - reason=f"pyarrow.type_for_alias cannot infer {pa_dtype}", - ) - ) - elif pa.types.is_string(pa_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason=( - "Still support StringDtype('pyarrow') " - "over ArrowDtype(pa.string())" - ), - ) - ) - super().test_is_dtype_from_name(dtype) - - def test_construct_from_string(self, dtype, request): - pa_dtype = dtype.pyarrow_dtype - if pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None: - request.node.add_marker( - pytest.mark.xfail( - raises=NotImplementedError, - reason=f"pyarrow.type_for_alias cannot infer {pa_dtype}", - ) - ) - elif pa.types.is_string(pa_dtype): - request.node.add_marker( - pytest.mark.xfail( - raises=TypeError, - reason=( - "Still support StringDtype('pyarrow') " - "over ArrowDtype(pa.string())" - ), - ) - ) - super().test_construct_from_string(dtype) + if pa.types.is_string(pa_dtype): + # We still support StringDtype('pyarrow') over ArrowDtype(pa.string()) + assert not type(dtype).is_dtype(dtype.name) + else: + super().test_is_dtype_from_name(dtype) def test_construct_from_string_another_type_raises(self, dtype): msg = r"'another_type' must end with '\[pyarrow\]'" @@ -753,13 +708,6 @@ def test_EA_types(self, engine, data, request): request.node.add_marker( pytest.mark.xfail(raises=TypeError, reason="GH 47534") ) - elif pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None: - request.node.add_marker( - pytest.mark.xfail( - raises=NotImplementedError, - reason=f"Parameterized types with tz={pa_dtype.tz} not supported.", - ) - ) elif pa.types.is_timestamp(pa_dtype) and pa_dtype.unit in ("us", "ns"): request.node.add_marker( pytest.mark.xfail( @@ -1266,7 +1214,12 @@ def test_invalid_other_comp(self, data, comparison_op): def test_arrowdtype_construct_from_string_type_with_unsupported_parameters(): with pytest.raises(NotImplementedError, match="Passing pyarrow type"): - ArrowDtype.construct_from_string("timestamp[s, tz=UTC][pyarrow]") + ArrowDtype.construct_from_string("not_a_real_dype[s, tz=UTC][pyarrow]") + + # but as of GH#50689, timestamptz is supported + dtype = ArrowDtype.construct_from_string("timestamp[s, tz=UTC][pyarrow]") + expected = ArrowDtype(pa.timestamp("s", "UTC")) + assert dtype == expected @pytest.mark.parametrize(