Skip to content

Commit 4253358

Browse files
authored
fix: raise ValueError if date is out-of-bounds (#46)
* fix: raise ValueError if date is out-of-bounds * unify _datetime return type * add relevant unit test
1 parent 42109ed commit 4253358

File tree

4 files changed

+56
-28
lines changed

4 files changed

+56
-28
lines changed

db_dtypes/__init__.py

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
import datetime
1919
import re
20-
from typing import Union
20+
from typing import Optional, Union
2121

2222
import numpy
2323
import packaging.version
@@ -103,7 +103,7 @@ def _datetime(
103103
r"(?::(?P<seconds>\d+)"
104104
r"(?:\.(?P<fraction>\d*))?)?)?\s*$"
105105
).match,
106-
):
106+
) -> Optional[numpy.datetime64]:
107107
# Convert pyarrow values to datetime.time.
108108
if isinstance(scalar, (pyarrow.Time32Scalar, pyarrow.Time64Scalar)):
109109
scalar = (
@@ -115,8 +115,16 @@ def _datetime(
115115

116116
if scalar is None:
117117
return None
118-
elif isinstance(scalar, datetime.time):
119-
return datetime.datetime.combine(_EPOCH, scalar)
118+
if isinstance(scalar, datetime.time):
119+
return pandas.Timestamp(
120+
year=1970,
121+
month=1,
122+
day=1,
123+
hour=scalar.hour,
124+
minute=scalar.minute,
125+
second=scalar.second,
126+
microsecond=scalar.microsecond,
127+
).to_datetime64()
120128
elif isinstance(scalar, pandas.Timestamp):
121129
return scalar.to_datetime64()
122130
elif isinstance(scalar, str):
@@ -125,20 +133,20 @@ def _datetime(
125133
if not parsed:
126134
raise ValueError(f"Bad time string: {repr(scalar)}")
127135

128-
hours = parsed.group("hours")
129-
minutes = parsed.group("minutes")
130-
seconds = parsed.group("seconds")
136+
hour = parsed.group("hours")
137+
minute = parsed.group("minutes")
138+
second = parsed.group("seconds")
131139
fraction = parsed.group("fraction")
132-
microseconds = int(fraction.ljust(6, "0")[:6]) if fraction else 0
133-
return datetime.datetime(
134-
1970,
135-
1,
136-
1,
137-
int(hours),
138-
int(minutes) if minutes else 0,
139-
int(seconds) if seconds else 0,
140-
microseconds,
141-
)
140+
nanosecond = int(fraction.ljust(9, "0")[:9]) if fraction else 0
141+
return pandas.Timestamp(
142+
year=1970,
143+
month=1,
144+
day=1,
145+
hour=int(hour),
146+
minute=int(minute) if minute else 0,
147+
second=int(second) if second else 0,
148+
nanosecond=nanosecond,
149+
).to_datetime64()
142150
else:
143151
raise TypeError("Invalid value type", scalar)
144152

@@ -225,23 +233,25 @@ class DateArray(core.BaseDatetimeArray):
225233
def _datetime(
226234
scalar,
227235
match_fn=re.compile(r"\s*(?P<year>\d+)-(?P<month>\d+)-(?P<day>\d+)\s*$").match,
228-
):
236+
) -> Optional[numpy.datetime64]:
229237
# Convert pyarrow values to datetime.date.
230238
if isinstance(scalar, (pyarrow.Date32Scalar, pyarrow.Date64Scalar)):
231239
scalar = scalar.as_py()
232240

233241
if scalar is None:
234242
return None
235243
elif isinstance(scalar, datetime.date):
236-
return datetime.datetime(scalar.year, scalar.month, scalar.day)
244+
return pandas.Timestamp(
245+
year=scalar.year, month=scalar.month, day=scalar.day
246+
).to_datetime64()
237247
elif isinstance(scalar, str):
238248
match = match_fn(scalar)
239249
if not match:
240250
raise ValueError(f"Bad date string: {repr(scalar)}")
241251
year = int(match.group("year"))
242252
month = int(match.group("month"))
243253
day = int(match.group("day"))
244-
return datetime.datetime(year, month, day)
254+
return pandas.Timestamp(year=year, month=month, day=day).to_datetime64()
245255
else:
246256
raise TypeError("Invalid value type", scalar)
247257

db_dtypes/core.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,7 @@ def take(
127127
if allow_fill:
128128
fill_value = self._validate_scalar(fill_value)
129129
fill_value = (
130-
numpy.datetime64()
131-
if fill_value is None
132-
else numpy.datetime64(self._datetime(fill_value))
130+
numpy.datetime64() if fill_value is None else self._datetime(fill_value)
133131
)
134132
if (indices < -1).any():
135133
raise ValueError(

tests/unit/test_arrow.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -183,13 +183,13 @@ def types_mapper(
183183
type=pyarrow.time64("us"),
184184
),
185185
),
186-
(
186+
# Only microseconds are supported when reading data. See:
187+
# https://github.com/googleapis/python-db-dtypes-pandas/issues/19
188+
# Still, round-trip with pyarrow nanosecond precision scalars
189+
# is supported.
190+
pytest.param(
187191
pandas.Series(
188192
[
189-
# Only microseconds are supported when reading data. See:
190-
# https://github.com/googleapis/python-db-dtypes-pandas/issues/19
191-
# Still, round-trip with pyarrow nanosecond precision scalars
192-
# is supported.
193193
pyarrow.scalar(0, pyarrow.time64("ns")),
194194
pyarrow.scalar(
195195
12 * HOUR_NANOS
@@ -216,6 +216,21 @@ def types_mapper(
216216
],
217217
type=pyarrow.time64("ns"),
218218
),
219+
id="time-nanoseconds-arrow-round-trip",
220+
),
221+
pytest.param(
222+
pandas.Series(
223+
["0:0:0", "12:30:15.123456789", "23:59:59.999999999"], dtype="dbtime",
224+
),
225+
pyarrow.array(
226+
[
227+
0,
228+
12 * HOUR_NANOS + 30 * MINUTE_NANOS + 15 * SECOND_NANOS + 123_456_789,
229+
23 * HOUR_NANOS + 59 * MINUTE_NANOS + 59 * SECOND_NANOS + 999_999_999,
230+
],
231+
type=pyarrow.time64("ns"),
232+
),
233+
id="time-nanoseconds-arrow-from-string",
219234
),
220235
]
221236

tests/unit/test_date.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,11 @@ def test_date_parsing(value, expected):
5555
("2021-2-99", "day is out of range for month"),
5656
("2021-99-1", "month must be in 1[.][.]12"),
5757
("10000-1-1", "year 10000 is out of range"),
58+
# Outside of min/max values pandas.Timestamp.
59+
("0001-01-01", "Out of bounds"),
60+
("9999-12-31", "Out of bounds"),
61+
("1677-09-21", "Out of bounds"),
62+
("2262-04-12", "Out of bounds"),
5863
],
5964
)
6065
def test_date_parsing_errors(value, error):

0 commit comments

Comments
 (0)