Skip to content

BUG: pd.DateOffset handle milliseconds #50020

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
Dec 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
48d9dfa
BUG: fix milliseconds and add test
markopacak Dec 1, 2022
65cd5da
BUG: move condition-check outside main if-block
markopacak Dec 2, 2022
f989674
DOC: update whatsnew
markopacak Dec 3, 2022
b3a5ef1
DOC: update whatsnew
markopacak Dec 3, 2022
37427dd
Merge branch 'main' into bug-dateoffset-milliseconds
markopacak Dec 4, 2022
705d2d6
CLN: reduce docstring
markopacak Dec 4, 2022
c1be7c6
CLN: remove return statement
markopacak Dec 4, 2022
1788491
Merge branch 'main' into bug-dateoffset-milliseconds
markopacak Dec 4, 2022
ae0b529
Merge branch 'pandas-dev:main' into bug-dateoffset-milliseconds
markopacak Dec 8, 2022
64c9ee3
TST: update assertions
markopacak Dec 8, 2022
65015c8
CLN: rework _determine_offset and add tests
markopacak Dec 8, 2022
ef7f118
CLN: def to cdef
markopacak Dec 8, 2022
b2b7473
BUG: GH49897 raise for invalid arguments + test
markopacak Dec 15, 2022
c2cf081
TST: GH 49897 remove useless test
markopacak Dec 19, 2022
3981b04
TST: BUG49897 convert millis to micros so relativedelta can use them
markopacak Dec 19, 2022
d80e780
Merge branch 'main' into bug-dateoffset-milliseconds
markopacak Dec 20, 2022
27109a7
TST: GH49897 test millis + micros
markopacak Dec 22, 2022
76fafe2
Merge remote-tracking branch 'upstream/main' into bug-dateoffset-mill…
markopacak Dec 22, 2022
2ad8c60
CLN: GH49897 remove comment
markopacak Dec 22, 2022
2f913a8
Merge branch 'bug-dateoffset-milliseconds' of github.com:markopacak/p…
markopacak Dec 22, 2022
8eb83d0
DOC: GH49897 update rst
markopacak Dec 22, 2022
309e296
CLN: re-order if-conditions
markopacak Dec 22, 2022
0d85a74
TST: GH49897 test raise
markopacak Dec 22, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -799,6 +799,7 @@ Datetimelike
- Bug in :class:`Timestamp` was showing ``UserWarning``, which was not actionable by users, when parsing non-ISO8601 delimited date strings (:issue:`50232`)
- Bug in :func:`to_datetime` was showing misleading ``ValueError`` when parsing dates with format containing ISO week directive and ISO weekday directive (:issue:`50308`)
- Bug in :func:`to_datetime` was not raising ``ValueError`` when invalid format was passed and ``errors`` was ``'ignore'`` or ``'coerce'`` (:issue:`50266`)
- Bug in :class:`DateOffset` was throwing ``TypeError`` when constructing with milliseconds and another super-daily argument (:issue:`49897`)
-

Timedelta
Expand Down
84 changes: 47 additions & 37 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -298,43 +298,54 @@ _relativedelta_kwds = {"years", "months", "weeks", "days", "year", "month",


cdef _determine_offset(kwds):
# timedelta is used for sub-daily plural offsets and all singular
# offsets, relativedelta is used for plural offsets of daily length or
# more, nanosecond(s) are handled by apply_wraps
kwds_no_nanos = dict(
(k, v) for k, v in kwds.items()
if k not in ("nanosecond", "nanoseconds")
)
# TODO: Are nanosecond and nanoseconds allowed somewhere?

_kwds_use_relativedelta = ("years", "months", "weeks", "days",
"year", "month", "week", "day", "weekday",
"hour", "minute", "second", "microsecond",
"millisecond")

use_relativedelta = False
if len(kwds_no_nanos) > 0:
if any(k in _kwds_use_relativedelta for k in kwds_no_nanos):
if "millisecond" in kwds_no_nanos:
raise NotImplementedError(
"Using DateOffset to replace `millisecond` component in "
"datetime object is not supported. Use "
"`microsecond=timestamp.microsecond % 1000 + ms * 1000` "
"instead."
)
offset = relativedelta(**kwds_no_nanos)
use_relativedelta = True
else:
# sub-daily offset - use timedelta (tz-aware)
offset = timedelta(**kwds_no_nanos)
elif any(nano in kwds for nano in ("nanosecond", "nanoseconds")):
offset = timedelta(days=0)
else:
# GH 45643/45890: (historically) defaults to 1 day for non-nano
# since datetime.timedelta doesn't handle nanoseconds
offset = timedelta(days=1)
return offset, use_relativedelta
if not kwds:
# GH 45643/45890: (historically) defaults to 1 day
return timedelta(days=1), False

if "millisecond" in kwds:
raise NotImplementedError(
"Using DateOffset to replace `millisecond` component in "
"datetime object is not supported. Use "
"`microsecond=timestamp.microsecond % 1000 + ms * 1000` "
"instead."
)

nanos = {"nanosecond", "nanoseconds"}

# nanos are handled by apply_wraps
if all(k in nanos for k in kwds):
return timedelta(days=0), False

kwds_no_nanos = {k: v for k, v in kwds.items() if k not in nanos}

kwds_use_relativedelta = {
"year", "month", "day", "hour", "minute",
"second", "microsecond", "weekday", "years", "months", "weeks", "days",
"hours", "minutes", "seconds", "microseconds"
}

# "weeks" and "days" are left out despite being valid args for timedelta,
# because (historically) timedelta is used only for sub-daily.
kwds_use_timedelta = {
"seconds", "microseconds", "milliseconds", "minutes", "hours",
}

if all(k in kwds_use_timedelta for k in kwds_no_nanos):
# Sub-daily offset - use timedelta (tz-aware)
# This also handles "milliseconds" (plur): see GH 49897
return timedelta(**kwds_no_nanos), False

# convert milliseconds to microseconds, so relativedelta can parse it
if "milliseconds" in kwds_no_nanos:
micro = kwds_no_nanos.pop("milliseconds") * 1000
kwds_no_nanos["microseconds"] = kwds_no_nanos.get("microseconds", 0) + micro

if all(k in kwds_use_relativedelta for k in kwds_no_nanos):
return relativedelta(**kwds_no_nanos), True

raise ValueError(
f"Invalid argument/s or bad combination of arguments: {list(kwds.keys())}"
)

Comment on lines +346 to 349
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a test that hits this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, ever since minutes was added to kwds_use_relativedelta, and milliseconds is converted to microseconds, the only case where you raise is for invalid args

So it would be something like:

def test_offset_invalid_arguments(self):
    with pytest.raises(ValueError, match="^Invalid argument/s or bad combination of arguments"):
        DateOffset(picoseconds=1)  # or any other invalid arg

Does it make sense?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah looks good - if we're adding code, let's make sure it's covered

# ---------------------------------------------------------------------
# Mixins & Singletons
Expand Down Expand Up @@ -1163,7 +1174,6 @@ cdef class RelativeDeltaOffset(BaseOffset):

def __init__(self, n=1, normalize=False, **kwds):
BaseOffset.__init__(self, n, normalize)

off, use_rd = _determine_offset(kwds)
object.__setattr__(self, "_offset", off)
object.__setattr__(self, "_use_relativedelta", use_rd)
Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/tseries/offsets/test_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,33 @@ def test_eq(self):

assert DateOffset(milliseconds=3) != DateOffset(milliseconds=7)

@pytest.mark.parametrize(
"offset_kwargs, expected_arg",
[
({"microseconds": 1, "milliseconds": 1}, "2022-01-01 00:00:00.001001"),
({"seconds": 1, "milliseconds": 1}, "2022-01-01 00:00:01.001"),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should there be a test to check when both milliseconds and microseconds were passed, seeing as you sum them together?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(CI failures are unrelated BTW, if you fetch and merge upstream/main they should be fixed)

({"minutes": 1, "milliseconds": 1}, "2022-01-01 00:01:00.001"),
({"hours": 1, "milliseconds": 1}, "2022-01-01 01:00:00.001"),
({"days": 1, "milliseconds": 1}, "2022-01-02 00:00:00.001"),
({"weeks": 1, "milliseconds": 1}, "2022-01-08 00:00:00.001"),
({"months": 1, "milliseconds": 1}, "2022-02-01 00:00:00.001"),
({"years": 1, "milliseconds": 1}, "2023-01-01 00:00:00.001"),
],
)
def test_milliseconds_combination(self, offset_kwargs, expected_arg):
# GH 49897
offset = DateOffset(**offset_kwargs)
ts = Timestamp("2022-01-01")
result = ts + offset
expected = Timestamp(expected_arg)

assert result == expected

def test_offset_invalid_arguments(self):
msg = "^Invalid argument/s or bad combination of arguments"
with pytest.raises(ValueError, match=msg):
DateOffset(picoseconds=1)


class TestOffsetNames:
def test_get_offset_name(self):
Expand Down