From 61ff541807ca3f44a3c0c2f5c920c518ff5074f9 Mon Sep 17 00:00:00 2001 From: Thomas Dixon Date: Mon, 11 Nov 2024 09:17:25 -0500 Subject: [PATCH 1/4] Fix gh54763 bug in both datetime arrays and timestamps --- pandas/_libs/tslibs/timestamps.pyx | 34 ++++++++++++++++++------------ pandas/core/arrays/datetimes.py | 5 +++++ 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 1ab34da7ab53f..c76e99dee5ddf 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -3319,19 +3319,27 @@ default 'raise' if month <= 2: year -= 1 month += 12 - return (day + - np.fix((153 * month - 457) / 5) + - 365 * year + - np.floor(year / 4) - - np.floor(year / 100) + - np.floor(year / 400) + - 1721118.5 + - (self.hour + - self.minute / 60.0 + - self.second / 3600.0 + - self.microsecond / 3600.0 / 1e+6 + - self.nanosecond / 3600.0 / 1e+9 - ) / 24.0) + timezone_offset = 0.0 + if self.tzinfo is not None: + timezone_offset = self.utcoffset().total_seconds() / 86400.0 + return ( + day + + np.fix((153 * month - 457) / 5) + + 365 * year + + np.floor(year / 4) + - np.floor(year / 100) + + np.floor(year / 400) + + 1721118.5 + - timezone_offset + + ( + self.hour + + self.minute / 60.0 + + self.second / 3600.0 + + self.microsecond / 3600.0 / 1e+6 + + self.nanosecond / 3600.0 / 1e+9 + ) + / 24.0 + ) def isoweekday(self): """ diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 43cc492f82885..abb0b0765565a 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -2266,6 +2266,10 @@ def to_julian_date(self) -> npt.NDArray[np.float64]: testarr = month < 3 year[testarr] -= 1 month[testarr] += 12 + timezone_offsets = np.zeros(len(self), dtype=np.float64) + for i in range(len(self)): + if self[i].tzinfo is not None: + timezone_offsets[i] = self[i].utcoffset().total_seconds() / 86400.0 return ( day + np.fix((153 * month - 457) / 5) @@ -2274,6 +2278,7 @@ def to_julian_date(self) -> npt.NDArray[np.float64]: - np.floor(year / 100) + np.floor(year / 400) + 1_721_118.5 + - timezone_offsets + ( self.hour + self.minute / 60 From 87ccf69ea469ee2fd33e20ab49e24b67db876a01 Mon Sep 17 00:00:00 2001 From: Thomas Dixon Date: Mon, 11 Nov 2024 11:05:27 -0500 Subject: [PATCH 2/4] Add regression tests for bug gh54763 --- .../datetimes/methods/test_to_julian_date.py | 49 +++++++++++++++++++ .../timestamp/methods/test_to_julian_date.py | 19 +++++++ 2 files changed, 68 insertions(+) diff --git a/pandas/tests/indexes/datetimes/methods/test_to_julian_date.py b/pandas/tests/indexes/datetimes/methods/test_to_julian_date.py index fc1f0595c21c5..90990e5556bf4 100644 --- a/pandas/tests/indexes/datetimes/methods/test_to_julian_date.py +++ b/pandas/tests/indexes/datetimes/methods/test_to_julian_date.py @@ -1,11 +1,14 @@ import numpy as np +from zoneinfo import ZoneInfo from pandas import ( Index, Timestamp, date_range, + to_datetime, ) import pandas._testing as tm +import pytest class TestDateTimeIndexToJulianDate: @@ -43,3 +46,49 @@ def test_second(self): r2 = dr.to_julian_date() assert isinstance(r2, Index) and r2.dtype == np.float64 tm.assert_index_equal(r1, r2) + + @pytest.mark.parametrize("tz, expected", [ + (None, 2400000.5), + (ZoneInfo("UTC"), 2400000.5), + (ZoneInfo("US/Pacific"), 2400000.5 + (8 / 24)), + (ZoneInfo("Europe/London"), 2400000.5 - (1 / 24)) + ]) + def test_to_julian_date_with_timezones_single_element(self, tz, expected): + # GH54763: Timestamp.to_julian_date() must consider timezone + dates = to_datetime(['1858-11-17T00:00:00.0']) + if tz: + dates = dates.tz_localize(tz) + result = Index(dates.to_julian_date()) + expected = Index([expected]) + tm.assert_almost_equal(result, expected, rtol=1e-6, atol=1e-6) + + @pytest.mark.parametrize("tz, offset", [ + (None, 0), + (ZoneInfo("UTC"), 0), + (ZoneInfo("US/Pacific"), 8), + (ZoneInfo("Europe/London"), -1) + ]) + def test_to_julian_date_with_timezones_multiple_elements(self, tz, offset): + # GH54763: Timestamp.to_julian_date() must consider timezone + dates = to_datetime( + [ + '1858-11-17T00:00:00', + '1858-11-17T12:00:00', + '2000-01-01T00:00:00', + '2000-01-01T12:00:00', + '2000-01-01T12:00:00' + ] + ) + if tz: + dates = dates.tz_localize(tz) + result = Index(dates.to_julian_date()) + expected = Index( + [ + 2400000.5 + (offset / 24), + 2400001.0 + (offset / 24), + 2451544.5 + (offset / 24), + 2451545.0 + (offset / 24), + 2451545.0 + (offset / 24) + ] + ) + tm.assert_almost_equal(result, expected, rtol=1e-6, atol=1e-6) diff --git a/pandas/tests/scalar/timestamp/methods/test_to_julian_date.py b/pandas/tests/scalar/timestamp/methods/test_to_julian_date.py index 7769614b601a4..1e11c3b3278cb 100644 --- a/pandas/tests/scalar/timestamp/methods/test_to_julian_date.py +++ b/pandas/tests/scalar/timestamp/methods/test_to_julian_date.py @@ -1,5 +1,10 @@ from pandas import Timestamp +import zoneinfo + +import pandas._testing as tm +import pytest + class TestTimestampToJulianDate: def test_compare_1700(self): @@ -26,3 +31,17 @@ def test_compare_hour13(self): ts = Timestamp("2000-08-12T13:00:00") res = ts.to_julian_date() assert res == 2_451_769.0416666666666666 + + @pytest.mark.parametrize("tz, expected", [ + (None, 2400000.5), + (zoneinfo.ZoneInfo("UTC"), 2400000.5), + (zoneinfo.ZoneInfo("US/Pacific"), 2400000.5 + (8 / 24)), + (zoneinfo.ZoneInfo("Europe/London"), 2400000.5 - (1 / 24)) + ]) + def test_to_julian_date_with_timezones(self, tz, expected): + # GH54763: Timestamp.to_julian_date() must consider timezone + ts = Timestamp('1858-11-17T00:00:00.0') + if tz: + ts.tz_localize(tz) + result = ts.to_julian_date() + tm.assert_almost_equal(result, expected, rtol=1e-6, atol=1e-6) From f32c1847fb999352fd3810898379246c5979fe65 Mon Sep 17 00:00:00 2001 From: Thomas Dixon Date: Mon, 11 Nov 2024 11:13:33 -0500 Subject: [PATCH 3/4] Add whatsnew documentation for bug gh54763 --- doc/source/whatsnew/v3.0.0.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 89bc942cb7250..51376e56d4808 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -635,7 +635,8 @@ Timedelta Timezones ^^^^^^^^^ -- +- Bug in :meth:`Index.to_julian_date()` not accounting for timezone information in `DatetimeIndex`, leading to incorrect Julian date calculations when timezones are present (:issue:`54763`). +- Bug in :meth:`Timestamp.to_julian_date()` not accounting for timezone in `Timestamp` calculations, resulting in incorrect Julian dates when timezone-aware timestamps are used (:issue:`54763`). - Numeric From b152209e0b83535115f50aef4c39cee4001a6826 Mon Sep 17 00:00:00 2001 From: Thomas Dixon Date: Mon, 11 Nov 2024 11:27:15 -0500 Subject: [PATCH 4/4] Precommit check fixes --- doc/source/whatsnew/v3.0.0.rst | 4 +- pandas/_libs/tslibs/timestamps.pyx | 2 +- .../datetimes/methods/test_to_julian_date.py | 49 +++++++++++-------- .../timestamp/methods/test_to_julian_date.py | 23 +++++---- 4 files changed, 44 insertions(+), 34 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 51376e56d4808..d319d6c9cc49d 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -635,8 +635,8 @@ Timedelta Timezones ^^^^^^^^^ -- Bug in :meth:`Index.to_julian_date()` not accounting for timezone information in `DatetimeIndex`, leading to incorrect Julian date calculations when timezones are present (:issue:`54763`). -- Bug in :meth:`Timestamp.to_julian_date()` not accounting for timezone in `Timestamp` calculations, resulting in incorrect Julian dates when timezone-aware timestamps are used (:issue:`54763`). +- Bug in :meth:`Index.to_julian_date` not accounting for timezone information in ``DatetimeIndex``, leading to incorrect Julian date calculations when timezones are present (:issue:`54763`). +- Bug in :meth:`Timestamp.to_julian_date` not accounting for timezone in ``Timestamp`` calculations, resulting in incorrect Julian dates when timezone-aware timestamps are used (:issue:`54763`). - Numeric diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index c76e99dee5ddf..658f239345917 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -3337,7 +3337,7 @@ default 'raise' + self.second / 3600.0 + self.microsecond / 3600.0 / 1e+6 + self.nanosecond / 3600.0 / 1e+9 - ) + ) / 24.0 ) diff --git a/pandas/tests/indexes/datetimes/methods/test_to_julian_date.py b/pandas/tests/indexes/datetimes/methods/test_to_julian_date.py index 90990e5556bf4..f6604b7854974 100644 --- a/pandas/tests/indexes/datetimes/methods/test_to_julian_date.py +++ b/pandas/tests/indexes/datetimes/methods/test_to_julian_date.py @@ -1,6 +1,8 @@ -import numpy as np from zoneinfo import ZoneInfo +import numpy as np +import pytest + from pandas import ( Index, Timestamp, @@ -8,7 +10,6 @@ to_datetime, ) import pandas._testing as tm -import pytest class TestDateTimeIndexToJulianDate: @@ -47,36 +48,42 @@ def test_second(self): assert isinstance(r2, Index) and r2.dtype == np.float64 tm.assert_index_equal(r1, r2) - @pytest.mark.parametrize("tz, expected", [ - (None, 2400000.5), - (ZoneInfo("UTC"), 2400000.5), - (ZoneInfo("US/Pacific"), 2400000.5 + (8 / 24)), - (ZoneInfo("Europe/London"), 2400000.5 - (1 / 24)) - ]) + @pytest.mark.parametrize( + "tz, expected", + [ + (None, 2400000.5), + (ZoneInfo("UTC"), 2400000.5), + (ZoneInfo("US/Pacific"), 2400000.5 + (8 / 24)), + (ZoneInfo("Europe/London"), 2400000.5 - (1 / 24)), + ], + ) def test_to_julian_date_with_timezones_single_element(self, tz, expected): # GH54763: Timestamp.to_julian_date() must consider timezone - dates = to_datetime(['1858-11-17T00:00:00.0']) + dates = to_datetime(["1858-11-17T00:00:00.0"]) if tz: dates = dates.tz_localize(tz) result = Index(dates.to_julian_date()) expected = Index([expected]) tm.assert_almost_equal(result, expected, rtol=1e-6, atol=1e-6) - @pytest.mark.parametrize("tz, offset", [ - (None, 0), - (ZoneInfo("UTC"), 0), - (ZoneInfo("US/Pacific"), 8), - (ZoneInfo("Europe/London"), -1) - ]) + @pytest.mark.parametrize( + "tz, offset", + [ + (None, 0), + (ZoneInfo("UTC"), 0), + (ZoneInfo("US/Pacific"), 8), + (ZoneInfo("Europe/London"), -1), + ], + ) def test_to_julian_date_with_timezones_multiple_elements(self, tz, offset): # GH54763: Timestamp.to_julian_date() must consider timezone dates = to_datetime( [ - '1858-11-17T00:00:00', - '1858-11-17T12:00:00', - '2000-01-01T00:00:00', - '2000-01-01T12:00:00', - '2000-01-01T12:00:00' + "1858-11-17T00:00:00", + "1858-11-17T12:00:00", + "2000-01-01T00:00:00", + "2000-01-01T12:00:00", + "2000-01-01T12:00:00", ] ) if tz: @@ -88,7 +95,7 @@ def test_to_julian_date_with_timezones_multiple_elements(self, tz, offset): 2400001.0 + (offset / 24), 2451544.5 + (offset / 24), 2451545.0 + (offset / 24), - 2451545.0 + (offset / 24) + 2451545.0 + (offset / 24), ] ) tm.assert_almost_equal(result, expected, rtol=1e-6, atol=1e-6) diff --git a/pandas/tests/scalar/timestamp/methods/test_to_julian_date.py b/pandas/tests/scalar/timestamp/methods/test_to_julian_date.py index 1e11c3b3278cb..9e860dfb39918 100644 --- a/pandas/tests/scalar/timestamp/methods/test_to_julian_date.py +++ b/pandas/tests/scalar/timestamp/methods/test_to_julian_date.py @@ -1,10 +1,10 @@ -from pandas import Timestamp - import zoneinfo -import pandas._testing as tm import pytest +from pandas import Timestamp +import pandas._testing as tm + class TestTimestampToJulianDate: def test_compare_1700(self): @@ -32,15 +32,18 @@ def test_compare_hour13(self): res = ts.to_julian_date() assert res == 2_451_769.0416666666666666 - @pytest.mark.parametrize("tz, expected", [ - (None, 2400000.5), - (zoneinfo.ZoneInfo("UTC"), 2400000.5), - (zoneinfo.ZoneInfo("US/Pacific"), 2400000.5 + (8 / 24)), - (zoneinfo.ZoneInfo("Europe/London"), 2400000.5 - (1 / 24)) - ]) + @pytest.mark.parametrize( + "tz, expected", + [ + (None, 2400000.5), + (zoneinfo.ZoneInfo("UTC"), 2400000.5), + (zoneinfo.ZoneInfo("US/Pacific"), 2400000.5 + (8 / 24)), + (zoneinfo.ZoneInfo("Europe/London"), 2400000.5 - (1 / 24)), + ], + ) def test_to_julian_date_with_timezones(self, tz, expected): # GH54763: Timestamp.to_julian_date() must consider timezone - ts = Timestamp('1858-11-17T00:00:00.0') + ts = Timestamp("1858-11-17T00:00:00.0") if tz: ts.tz_localize(tz) result = ts.to_julian_date()