Skip to content

Commit 7675b15

Browse files
authored
fix: support converting empty time Series to pyarrow Array (#11)
* fix: support converting empty `time` Series to pyarrow Array * use object dtype for time numpy array * backport to_numpy * remove redundant test
1 parent c65ff18 commit 7675b15

File tree

3 files changed

+165
-12
lines changed

3 files changed

+165
-12
lines changed

db_dtypes/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,8 @@ def to_numpy(self, dtype="object"):
125125

126126
def __arrow_array__(self, type=None):
127127
return pyarrow.array(
128-
self.to_numpy(), type=type if type is not None else pyarrow.time64("ns"),
128+
self.to_numpy(dtype="object"),
129+
type=type if type is not None else pyarrow.time64("ns"),
129130
)
130131

131132

tests/unit/test_arrow.py

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
# Copyright 2021 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import datetime as dt
16+
17+
import pandas
18+
import pyarrow
19+
import pytest
20+
21+
# To register the types.
22+
import db_dtypes # noqa
23+
24+
25+
@pytest.mark.parametrize(
26+
("series", "expected"),
27+
(
28+
(pandas.Series([], dtype="date"), pyarrow.array([], type=pyarrow.date32())),
29+
(
30+
pandas.Series([None, None, None], dtype="date"),
31+
pyarrow.array([None, None, None], type=pyarrow.date32()),
32+
),
33+
(
34+
pandas.Series(
35+
[dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)], dtype="date"
36+
),
37+
pyarrow.array(
38+
[dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)],
39+
type=pyarrow.date32(),
40+
),
41+
),
42+
(
43+
pandas.Series(
44+
[dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)],
45+
dtype="date",
46+
),
47+
pyarrow.array(
48+
[dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)],
49+
type=pyarrow.date32(),
50+
),
51+
),
52+
(pandas.Series([], dtype="time"), pyarrow.array([], type=pyarrow.time64("ns"))),
53+
(
54+
pandas.Series([None, None, None], dtype="time"),
55+
pyarrow.array([None, None, None], type=pyarrow.time64("ns")),
56+
),
57+
(
58+
pandas.Series(
59+
[dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)], dtype="time"
60+
),
61+
pyarrow.array(
62+
[dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)],
63+
type=pyarrow.time64("ns"),
64+
),
65+
),
66+
(
67+
pandas.Series(
68+
[
69+
dt.time(0, 0, 0, 0),
70+
dt.time(12, 30, 15, 125_000),
71+
dt.time(23, 59, 59, 999_999),
72+
],
73+
dtype="time",
74+
),
75+
pyarrow.array(
76+
[
77+
dt.time(0, 0, 0, 0),
78+
dt.time(12, 30, 15, 125_000),
79+
dt.time(23, 59, 59, 999_999),
80+
],
81+
type=pyarrow.time64("ns"),
82+
),
83+
),
84+
),
85+
)
86+
def test_to_arrow(series, expected):
87+
array = pyarrow.array(series)
88+
assert array.equals(expected)
89+
90+
91+
@pytest.mark.parametrize(
92+
("series", "expected"),
93+
(
94+
(pandas.Series([], dtype="date"), pyarrow.array([], type=pyarrow.date64())),
95+
(
96+
pandas.Series([None, None, None], dtype="date"),
97+
pyarrow.array([None, None, None], type=pyarrow.date64()),
98+
),
99+
(
100+
pandas.Series(
101+
[dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)], dtype="date"
102+
),
103+
pyarrow.array(
104+
[dt.date(2021, 9, 27), None, dt.date(2011, 9, 27)],
105+
type=pyarrow.date64(),
106+
),
107+
),
108+
(
109+
pandas.Series(
110+
[dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)],
111+
dtype="date",
112+
),
113+
pyarrow.array(
114+
[dt.date(1677, 9, 22), dt.date(1970, 1, 1), dt.date(2262, 4, 11)],
115+
type=pyarrow.date64(),
116+
),
117+
),
118+
(pandas.Series([], dtype="time"), pyarrow.array([], type=pyarrow.time32("ms"))),
119+
(
120+
pandas.Series([None, None, None], dtype="time"),
121+
pyarrow.array([None, None, None], type=pyarrow.time32("ms")),
122+
),
123+
(
124+
pandas.Series(
125+
[dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_000)], dtype="time"
126+
),
127+
pyarrow.array(
128+
[dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_000)],
129+
type=pyarrow.time32("ms"),
130+
),
131+
),
132+
(
133+
pandas.Series(
134+
[dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)], dtype="time"
135+
),
136+
pyarrow.array(
137+
[dt.time(0, 0, 0, 0), None, dt.time(23, 59, 59, 999_999)],
138+
type=pyarrow.time64("us"),
139+
),
140+
),
141+
(
142+
pandas.Series(
143+
[
144+
dt.time(0, 0, 0, 0),
145+
dt.time(12, 30, 15, 125_000),
146+
dt.time(23, 59, 59, 999_999),
147+
],
148+
dtype="time",
149+
),
150+
pyarrow.array(
151+
[
152+
dt.time(0, 0, 0, 0),
153+
dt.time(12, 30, 15, 125_000),
154+
dt.time(23, 59, 59, 999_999),
155+
],
156+
type=pyarrow.time64("us"),
157+
),
158+
),
159+
),
160+
)
161+
def test_to_arrow_w_arrow_type(series, expected):
162+
array = pyarrow.array(series, type=expected.type)
163+
assert array.equals(expected)

tests/unit/test_dtypes.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
import datetime
1616

1717
import packaging.version
18-
import pyarrow.lib
1918
import pytest
2019

2120
pd = pytest.importorskip("pandas")
@@ -670,13 +669,3 @@ def test_bad_time_parsing(value, error):
670669
def test_bad_date_parsing(value, error):
671670
with pytest.raises(ValueError, match=error):
672671
_cls("date")([value])
673-
674-
675-
@for_date_and_time
676-
def test_date___arrow__array__(dtype):
677-
a = _make_one(dtype)
678-
ar = a.__arrow_array__()
679-
assert isinstance(
680-
ar, pyarrow.Date32Array if dtype == "date" else pyarrow.Time64Array,
681-
)
682-
assert [v.as_py() for v in ar] == list(a)

0 commit comments

Comments
 (0)