Skip to content

Commit 450a1f0

Browse files
authored
BUG: ArrowDtype.construct_from_string round-trip (#50689)
* BUG: ArrowDtype.construct_from_string round-trip * suggestions
1 parent 0127d06 commit 450a1f0

File tree

3 files changed

+66
-81
lines changed

3 files changed

+66
-81
lines changed

pandas/core/arrays/arrow/dtype.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,13 @@ def construct_from_string(cls, string: str) -> ArrowDtype:
203203
except ValueError as err:
204204
has_parameters = re.search(r"\[.*\]", base_type)
205205
if has_parameters:
206+
# Fallback to try common temporal types
207+
try:
208+
return cls._parse_temporal_dtype_string(base_type)
209+
except (NotImplementedError, ValueError):
210+
# Fall through to raise with nice exception message below
211+
pass
212+
206213
raise NotImplementedError(
207214
"Passing pyarrow type specific parameters "
208215
f"({has_parameters.group()}) in the string is not supported. "
@@ -212,6 +219,35 @@ def construct_from_string(cls, string: str) -> ArrowDtype:
212219
raise TypeError(f"'{base_type}' is not a valid pyarrow data type.") from err
213220
return cls(pa_dtype)
214221

222+
# TODO(arrow#33642): This can be removed once supported by pyarrow
223+
@classmethod
224+
def _parse_temporal_dtype_string(cls, string: str) -> ArrowDtype:
225+
"""
226+
Construct a temporal ArrowDtype from string.
227+
"""
228+
# we assume
229+
# 1) "[pyarrow]" has already been stripped from the end of our string.
230+
# 2) we know "[" is present
231+
head, tail = string.split("[", 1)
232+
233+
if not tail.endswith("]"):
234+
raise ValueError
235+
tail = tail[:-1]
236+
237+
if head == "timestamp":
238+
assert "," in tail # otherwise type_for_alias should work
239+
unit, tz = tail.split(",", 1)
240+
unit = unit.strip()
241+
tz = tz.strip()
242+
if tz.startswith("tz="):
243+
tz = tz[3:]
244+
245+
pa_type = pa.timestamp(unit, tz=tz)
246+
dtype = cls(pa_type)
247+
return dtype
248+
249+
raise NotImplementedError(string)
250+
215251
@property
216252
def _is_numeric(self) -> bool:
217253
"""

pandas/tests/extension/base/dtype.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,6 @@ def test_kind(self, dtype):
2020
valid = set("biufcmMOSUV")
2121
assert dtype.kind in valid
2222

23-
def test_construct_from_string_own_name(self, dtype):
24-
result = dtype.construct_from_string(dtype.name)
25-
assert type(result) is type(dtype)
26-
27-
# check OK as classmethod
28-
result = type(dtype).construct_from_string(dtype.name)
29-
assert type(result) is type(dtype)
30-
3123
def test_is_dtype_from_name(self, dtype):
3224
result = type(dtype).is_dtype(dtype.name)
3325
assert result is True
@@ -97,9 +89,13 @@ def test_eq(self, dtype):
9789
assert dtype == dtype.name
9890
assert dtype != "anonther_type"
9991

100-
def test_construct_from_string(self, dtype):
101-
dtype_instance = type(dtype).construct_from_string(dtype.name)
102-
assert isinstance(dtype_instance, type(dtype))
92+
def test_construct_from_string_own_name(self, dtype):
93+
result = dtype.construct_from_string(dtype.name)
94+
assert type(result) is type(dtype)
95+
96+
# check OK as classmethod
97+
result = type(dtype).construct_from_string(dtype.name)
98+
assert type(result) is type(dtype)
10399

104100
def test_construct_from_string_another_type_raises(self, dtype):
105101
msg = f"Cannot construct a '{type(dtype).__name__}' from 'another_type'"

pandas/tests/extension/test_arrow.py

Lines changed: 23 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -250,13 +250,9 @@ def test_astype_str(self, data, request):
250250
class TestConstructors(base.BaseConstructorsTests):
251251
def test_from_dtype(self, data, request):
252252
pa_dtype = data.dtype.pyarrow_dtype
253-
if (pa.types.is_timestamp(pa_dtype) and pa_dtype.tz) or pa.types.is_string(
254-
pa_dtype
255-
):
256-
if pa.types.is_string(pa_dtype):
257-
reason = "ArrowDtype(pa.string()) != StringDtype('pyarrow')"
258-
else:
259-
reason = f"pyarrow.type_for_alias cannot infer {pa_dtype}"
253+
254+
if pa.types.is_string(pa_dtype):
255+
reason = "ArrowDtype(pa.string()) != StringDtype('pyarrow')"
260256
request.node.add_marker(
261257
pytest.mark.xfail(
262258
reason=reason,
@@ -604,65 +600,24 @@ def test_in_numeric_groupby(self, data_for_grouping):
604600
class TestBaseDtype(base.BaseDtypeTests):
605601
def test_construct_from_string_own_name(self, dtype, request):
606602
pa_dtype = dtype.pyarrow_dtype
607-
if pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None:
608-
request.node.add_marker(
609-
pytest.mark.xfail(
610-
raises=NotImplementedError,
611-
reason=f"pyarrow.type_for_alias cannot infer {pa_dtype}",
612-
)
613-
)
614-
elif pa.types.is_string(pa_dtype):
615-
request.node.add_marker(
616-
pytest.mark.xfail(
617-
raises=TypeError,
618-
reason=(
619-
"Still support StringDtype('pyarrow') "
620-
"over ArrowDtype(pa.string())"
621-
),
622-
)
623-
)
603+
604+
if pa.types.is_string(pa_dtype):
605+
# We still support StringDtype('pyarrow') over ArrowDtype(pa.string())
606+
msg = r"string\[pyarrow\] should be constructed by StringDtype"
607+
with pytest.raises(TypeError, match=msg):
608+
dtype.construct_from_string(dtype.name)
609+
610+
return
611+
624612
super().test_construct_from_string_own_name(dtype)
625613

626614
def test_is_dtype_from_name(self, dtype, request):
627615
pa_dtype = dtype.pyarrow_dtype
628-
if pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None:
629-
request.node.add_marker(
630-
pytest.mark.xfail(
631-
raises=NotImplementedError,
632-
reason=f"pyarrow.type_for_alias cannot infer {pa_dtype}",
633-
)
634-
)
635-
elif pa.types.is_string(pa_dtype):
636-
request.node.add_marker(
637-
pytest.mark.xfail(
638-
reason=(
639-
"Still support StringDtype('pyarrow') "
640-
"over ArrowDtype(pa.string())"
641-
),
642-
)
643-
)
644-
super().test_is_dtype_from_name(dtype)
645-
646-
def test_construct_from_string(self, dtype, request):
647-
pa_dtype = dtype.pyarrow_dtype
648-
if pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None:
649-
request.node.add_marker(
650-
pytest.mark.xfail(
651-
raises=NotImplementedError,
652-
reason=f"pyarrow.type_for_alias cannot infer {pa_dtype}",
653-
)
654-
)
655-
elif pa.types.is_string(pa_dtype):
656-
request.node.add_marker(
657-
pytest.mark.xfail(
658-
raises=TypeError,
659-
reason=(
660-
"Still support StringDtype('pyarrow') "
661-
"over ArrowDtype(pa.string())"
662-
),
663-
)
664-
)
665-
super().test_construct_from_string(dtype)
616+
if pa.types.is_string(pa_dtype):
617+
# We still support StringDtype('pyarrow') over ArrowDtype(pa.string())
618+
assert not type(dtype).is_dtype(dtype.name)
619+
else:
620+
super().test_is_dtype_from_name(dtype)
666621

667622
def test_construct_from_string_another_type_raises(self, dtype):
668623
msg = r"'another_type' must end with '\[pyarrow\]'"
@@ -753,13 +708,6 @@ def test_EA_types(self, engine, data, request):
753708
request.node.add_marker(
754709
pytest.mark.xfail(raises=TypeError, reason="GH 47534")
755710
)
756-
elif pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None:
757-
request.node.add_marker(
758-
pytest.mark.xfail(
759-
raises=NotImplementedError,
760-
reason=f"Parameterized types with tz={pa_dtype.tz} not supported.",
761-
)
762-
)
763711
elif pa.types.is_timestamp(pa_dtype) and pa_dtype.unit in ("us", "ns"):
764712
request.node.add_marker(
765713
pytest.mark.xfail(
@@ -1266,7 +1214,12 @@ def test_invalid_other_comp(self, data, comparison_op):
12661214

12671215
def test_arrowdtype_construct_from_string_type_with_unsupported_parameters():
12681216
with pytest.raises(NotImplementedError, match="Passing pyarrow type"):
1269-
ArrowDtype.construct_from_string("timestamp[s, tz=UTC][pyarrow]")
1217+
ArrowDtype.construct_from_string("not_a_real_dype[s, tz=UTC][pyarrow]")
1218+
1219+
# but as of GH#50689, timestamptz is supported
1220+
dtype = ArrowDtype.construct_from_string("timestamp[s, tz=UTC][pyarrow]")
1221+
expected = ArrowDtype(pa.timestamp("s", "UTC"))
1222+
assert dtype == expected
12701223

12711224

12721225
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)