diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 89bc942cb7250..d319d6c9cc49d 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -635,7 +635,8 @@ Timedelta Timezones ^^^^^^^^^ -- +- Bug in :meth:`Index.to_julian_date` not accounting for timezone information in ``DatetimeIndex``, leading to incorrect Julian date calculations when timezones are present (:issue:`54763`). +- Bug in :meth:`Timestamp.to_julian_date` not accounting for timezone in ``Timestamp`` calculations, resulting in incorrect Julian dates when timezone-aware timestamps are used (:issue:`54763`). - Numeric diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 1ab34da7ab53f..658f239345917 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -3319,19 +3319,27 @@ default 'raise' if month <= 2: year -= 1 month += 12 - return (day + - np.fix((153 * month - 457) / 5) + - 365 * year + - np.floor(year / 4) - - np.floor(year / 100) + - np.floor(year / 400) + - 1721118.5 + - (self.hour + - self.minute / 60.0 + - self.second / 3600.0 + - self.microsecond / 3600.0 / 1e+6 + - self.nanosecond / 3600.0 / 1e+9 - ) / 24.0) + timezone_offset = 0.0 + if self.tzinfo is not None: + timezone_offset = self.utcoffset().total_seconds() / 86400.0 + return ( + day + + np.fix((153 * month - 457) / 5) + + 365 * year + + np.floor(year / 4) + - np.floor(year / 100) + + np.floor(year / 400) + + 1721118.5 + - timezone_offset + + ( + self.hour + + self.minute / 60.0 + + self.second / 3600.0 + + self.microsecond / 3600.0 / 1e+6 + + self.nanosecond / 3600.0 / 1e+9 + ) + / 24.0 + ) def isoweekday(self): """ diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 43cc492f82885..abb0b0765565a 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -2266,6 +2266,10 @@ def to_julian_date(self) -> npt.NDArray[np.float64]: testarr = month < 3 year[testarr] -= 1 month[testarr] += 12 + timezone_offsets = np.zeros(len(self), dtype=np.float64) + for i in range(len(self)): + if self[i].tzinfo is not None: + timezone_offsets[i] = self[i].utcoffset().total_seconds() / 86400.0 return ( day + np.fix((153 * month - 457) / 5) @@ -2274,6 +2278,7 @@ def to_julian_date(self) -> npt.NDArray[np.float64]: - np.floor(year / 100) + np.floor(year / 400) + 1_721_118.5 + - timezone_offsets + ( self.hour + self.minute / 60 diff --git a/pandas/tests/indexes/datetimes/methods/test_to_julian_date.py b/pandas/tests/indexes/datetimes/methods/test_to_julian_date.py index fc1f0595c21c5..f6604b7854974 100644 --- a/pandas/tests/indexes/datetimes/methods/test_to_julian_date.py +++ b/pandas/tests/indexes/datetimes/methods/test_to_julian_date.py @@ -1,9 +1,13 @@ +from zoneinfo import ZoneInfo + import numpy as np +import pytest from pandas import ( Index, Timestamp, date_range, + to_datetime, ) import pandas._testing as tm @@ -43,3 +47,55 @@ def test_second(self): r2 = dr.to_julian_date() assert isinstance(r2, Index) and r2.dtype == np.float64 tm.assert_index_equal(r1, r2) + + @pytest.mark.parametrize( + "tz, expected", + [ + (None, 2400000.5), + (ZoneInfo("UTC"), 2400000.5), + (ZoneInfo("US/Pacific"), 2400000.5 + (8 / 24)), + (ZoneInfo("Europe/London"), 2400000.5 - (1 / 24)), + ], + ) + def test_to_julian_date_with_timezones_single_element(self, tz, expected): + # GH54763: Timestamp.to_julian_date() must consider timezone + dates = to_datetime(["1858-11-17T00:00:00.0"]) + if tz: + dates = dates.tz_localize(tz) + result = Index(dates.to_julian_date()) + expected = Index([expected]) + tm.assert_almost_equal(result, expected, rtol=1e-6, atol=1e-6) + + @pytest.mark.parametrize( + "tz, offset", + [ + (None, 0), + (ZoneInfo("UTC"), 0), + (ZoneInfo("US/Pacific"), 8), + (ZoneInfo("Europe/London"), -1), + ], + ) + def test_to_julian_date_with_timezones_multiple_elements(self, tz, offset): + # GH54763: Timestamp.to_julian_date() must consider timezone + dates = to_datetime( + [ + "1858-11-17T00:00:00", + "1858-11-17T12:00:00", + "2000-01-01T00:00:00", + "2000-01-01T12:00:00", + "2000-01-01T12:00:00", + ] + ) + if tz: + dates = dates.tz_localize(tz) + result = Index(dates.to_julian_date()) + expected = Index( + [ + 2400000.5 + (offset / 24), + 2400001.0 + (offset / 24), + 2451544.5 + (offset / 24), + 2451545.0 + (offset / 24), + 2451545.0 + (offset / 24), + ] + ) + tm.assert_almost_equal(result, expected, rtol=1e-6, atol=1e-6) diff --git a/pandas/tests/scalar/timestamp/methods/test_to_julian_date.py b/pandas/tests/scalar/timestamp/methods/test_to_julian_date.py index 7769614b601a4..9e860dfb39918 100644 --- a/pandas/tests/scalar/timestamp/methods/test_to_julian_date.py +++ b/pandas/tests/scalar/timestamp/methods/test_to_julian_date.py @@ -1,4 +1,9 @@ +import zoneinfo + +import pytest + from pandas import Timestamp +import pandas._testing as tm class TestTimestampToJulianDate: @@ -26,3 +31,20 @@ def test_compare_hour13(self): ts = Timestamp("2000-08-12T13:00:00") res = ts.to_julian_date() assert res == 2_451_769.0416666666666666 + + @pytest.mark.parametrize( + "tz, expected", + [ + (None, 2400000.5), + (zoneinfo.ZoneInfo("UTC"), 2400000.5), + (zoneinfo.ZoneInfo("US/Pacific"), 2400000.5 + (8 / 24)), + (zoneinfo.ZoneInfo("Europe/London"), 2400000.5 - (1 / 24)), + ], + ) + def test_to_julian_date_with_timezones(self, tz, expected): + # GH54763: Timestamp.to_julian_date() must consider timezone + ts = Timestamp("1858-11-17T00:00:00.0") + if tz: + ts.tz_localize(tz) + result = ts.to_julian_date() + tm.assert_almost_equal(result, expected, rtol=1e-6, atol=1e-6)