diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 35f9f623bf8ef..4d9422143fe6a 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -305,6 +305,8 @@ Datetimelike - Bug in :func:`date_range` when ``freq`` was a :class:`DateOffset` with ``nanoseconds`` (:issue:`46877`) - Bug in :meth:`Timestamp.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsDatetime`` (:issue:`51494`) - Bug in :meth:`arrays.DatetimeArray.map` and :meth:`DatetimeIndex.map`, where the supplied callable operated array-wise instead of element-wise (:issue:`51977`) +- Bug in parsing datetime strings with weekday but no day e.g. "2023 Sept Thu" incorrectly raising ``AttributeError`` instead of ``ValueError`` (:issue:`52659`) +- Timedelta ^^^^^^^^^ diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 31acf0ef1bbe4..ffbaddfcd2afa 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -15,8 +15,6 @@ from cpython.datetime cimport ( import_datetime() -from dateutil.easter import easter -from dateutil.relativedelta import relativedelta import numpy as np cimport numpy as cnp @@ -348,6 +346,8 @@ cdef _determine_offset(kwds): kwds_no_nanos["microseconds"] = kwds_no_nanos.get("microseconds", 0) + micro if all(k in kwds_use_relativedelta for k in kwds_no_nanos): + from dateutil.relativedelta import relativedelta + return relativedelta(**kwds_no_nanos), True raise ValueError( @@ -3691,6 +3691,8 @@ cdef class Easter(SingleConstructorOffset): @apply_wraps def _apply(self, other: datetime) -> datetime: + from dateutil.easter import easter + current_easter = easter(other.year) current_easter = datetime( current_easter.year, current_easter.month, current_easter.day @@ -3721,6 +3723,9 @@ cdef class Easter(SingleConstructorOffset): def is_on_offset(self, dt: datetime) -> bool: if self.normalize and not _is_normalized(dt): return False + + from dateutil.easter import easter + return date(dt.year, dt.month, dt.day) == easter(dt.year) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index cd92e1b8deb34..880ac70b39265 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -46,7 +46,6 @@ from dateutil.parser import ( DEFAULTPARSER, parse as du_parse, ) -from dateutil.relativedelta import relativedelta from dateutil.tz import ( tzlocal as _dateutil_tzlocal, tzoffset, @@ -692,7 +691,11 @@ cdef datetime dateutil_parse( ) from err if res.weekday is not None and not res.day: - ret = ret + relativedelta.relativedelta(weekday=res.weekday) + # GH#52659 + raise ValueError( + "Parsing datetimes with weekday but no day information is " + "not supported" + ) if not ignoretz: if res.tzname and res.tzname in time.tzname: # GH#50791 diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 0492ba22dcf8a..10859c0fa58c4 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -12,7 +12,10 @@ from __future__ import annotations from collections import abc -import datetime +from datetime import ( + datetime, + timedelta, +) from io import BytesIO import os import struct @@ -30,7 +33,6 @@ ) import warnings -from dateutil.relativedelta import relativedelta import numpy as np from pandas._libs import lib @@ -226,7 +228,7 @@ _date_formats = ["%tc", "%tC", "%td", "%d", "%tw", "%tm", "%tq", "%th", "%ty"] -stata_epoch: Final = datetime.datetime(1960, 1, 1) +stata_epoch: Final = datetime(1960, 1, 1) # TODO: Add typing. As of January 2020 it is not possible to type this function since @@ -279,8 +281,8 @@ def _stata_elapsed_date_to_datetime_vec(dates, fmt) -> Series: years since 0000 """ MIN_YEAR, MAX_YEAR = Timestamp.min.year, Timestamp.max.year - MAX_DAY_DELTA = (Timestamp.max - datetime.datetime(1960, 1, 1)).days - MIN_DAY_DELTA = (Timestamp.min - datetime.datetime(1960, 1, 1)).days + MAX_DAY_DELTA = (Timestamp.max - datetime(1960, 1, 1)).days + MIN_DAY_DELTA = (Timestamp.min - datetime(1960, 1, 1)).days MIN_MS_DELTA = MIN_DAY_DELTA * 24 * 3600 * 1000 MAX_MS_DELTA = MAX_DAY_DELTA * 24 * 3600 * 1000 @@ -295,9 +297,7 @@ def convert_year_month_safe(year, month) -> Series: return to_datetime(100 * year + month, format="%Y%m") else: index = getattr(year, "index", None) - return Series( - [datetime.datetime(y, m, 1) for y, m in zip(year, month)], index=index - ) + return Series([datetime(y, m, 1) for y, m in zip(year, month)], index=index) def convert_year_days_safe(year, days) -> Series: """ @@ -309,8 +309,7 @@ def convert_year_days_safe(year, days) -> Series: else: index = getattr(year, "index", None) value = [ - datetime.datetime(y, 1, 1) + relativedelta(days=int(d)) - for y, d in zip(year, days) + datetime(y, 1, 1) + timedelta(days=int(d)) for y, d in zip(year, days) ] return Series(value, index=index) @@ -323,12 +322,12 @@ def convert_delta_safe(base, deltas, unit) -> Series: index = getattr(deltas, "index", None) if unit == "d": if deltas.max() > MAX_DAY_DELTA or deltas.min() < MIN_DAY_DELTA: - values = [base + relativedelta(days=int(d)) for d in deltas] + values = [base + timedelta(days=int(d)) for d in deltas] return Series(values, index=index) elif unit == "ms": if deltas.max() > MAX_MS_DELTA or deltas.min() < MIN_MS_DELTA: values = [ - base + relativedelta(microseconds=(int(d) * 1000)) for d in deltas + base + timedelta(microseconds=(int(d) * 1000)) for d in deltas ] return Series(values, index=index) else: @@ -405,7 +404,7 @@ def _datetime_to_stata_elapsed_vec(dates: Series, fmt: str) -> Series: Parameters ---------- dates : Series - Series or array containing datetime.datetime or datetime64[ns] to + Series or array containing datetime or datetime64[ns] to convert to the Stata Internal Format given by fmt fmt : str The format to convert to. Can be, tc, td, tw, tm, tq, th, ty @@ -436,7 +435,7 @@ def parse_dates_safe( if delta: delta = dates._values - stata_epoch - def f(x: datetime.timedelta) -> float: + def f(x: timedelta) -> float: return US_PER_DAY * x.days + 1000000 * x.seconds + x.microseconds v = np.vectorize(f) @@ -447,15 +446,15 @@ def f(x: datetime.timedelta) -> float: d["month"] = year_month._values - d["year"] * 100 if days: - def g(x: datetime.datetime) -> int: - return (x - datetime.datetime(x.year, 1, 1)).days + def g(x: datetime) -> int: + return (x - datetime(x.year, 1, 1)).days v = np.vectorize(g) d["days"] = v(dates) else: raise ValueError( "Columns containing dates must contain either " - "datetime64, datetime.datetime or null values." + "datetime64, datetime or null values." ) return DataFrame(d, index=index) @@ -2291,7 +2290,7 @@ class StataWriter(StataParser): * If datetimes contain timezone information ValueError * Columns listed in convert_dates are neither datetime64[ns] - or datetime.datetime + or datetime * Column dtype is not representable in Stata * Column listed in convert_dates is not in DataFrame * Categorical label contains more than 32,000 characters @@ -2324,7 +2323,7 @@ def __init__( convert_dates: dict[Hashable, str] | None = None, write_index: bool = True, byteorder: str | None = None, - time_stamp: datetime.datetime | None = None, + time_stamp: datetime | None = None, data_label: str | None = None, variable_labels: dict[Hashable, str] | None = None, compression: CompressionOptions = "infer", @@ -2764,7 +2763,7 @@ def _write_value_labels(self) -> None: def _write_header( self, data_label: str | None = None, - time_stamp: datetime.datetime | None = None, + time_stamp: datetime | None = None, ) -> None: byteorder = self._byteorder # ds_format - just use 114 @@ -2789,8 +2788,8 @@ def _write_header( # time stamp, 18 bytes, char, null terminated # format dd Mon yyyy hh:mm if time_stamp is None: - time_stamp = datetime.datetime.now() - elif not isinstance(time_stamp, datetime.datetime): + time_stamp = datetime.now() + elif not isinstance(time_stamp, datetime): raise ValueError("time_stamp should be datetime type") # GH #13856 # Avoid locale-specific month conversion @@ -3214,7 +3213,7 @@ class StataWriter117(StataWriter): * If datetimes contain timezone information ValueError * Columns listed in convert_dates are neither datetime64[ns] - or datetime.datetime + or datetime * Column dtype is not representable in Stata * Column listed in convert_dates is not in DataFrame * Categorical label contains more than 32,000 characters @@ -3250,7 +3249,7 @@ def __init__( convert_dates: dict[Hashable, str] | None = None, write_index: bool = True, byteorder: str | None = None, - time_stamp: datetime.datetime | None = None, + time_stamp: datetime | None = None, data_label: str | None = None, variable_labels: dict[Hashable, str] | None = None, convert_strl: Sequence[Hashable] | None = None, @@ -3295,7 +3294,7 @@ def _update_map(self, tag: str) -> None: def _write_header( self, data_label: str | None = None, - time_stamp: datetime.datetime | None = None, + time_stamp: datetime | None = None, ) -> None: """Write the file header""" byteorder = self._byteorder @@ -3321,8 +3320,8 @@ def _write_header( # time stamp, 18 bytes, char, null terminated # format dd Mon yyyy hh:mm if time_stamp is None: - time_stamp = datetime.datetime.now() - elif not isinstance(time_stamp, datetime.datetime): + time_stamp = datetime.now() + elif not isinstance(time_stamp, datetime): raise ValueError("time_stamp should be datetime type") # Avoid locale-specific month conversion months = [ @@ -3604,7 +3603,7 @@ class StataWriterUTF8(StataWriter117): * If datetimes contain timezone information ValueError * Columns listed in convert_dates are neither datetime64[ns] - or datetime.datetime + or datetime * Column dtype is not representable in Stata * Column listed in convert_dates is not in DataFrame * Categorical label contains more than 32,000 characters @@ -3641,7 +3640,7 @@ def __init__( convert_dates: dict[Hashable, str] | None = None, write_index: bool = True, byteorder: str | None = None, - time_stamp: datetime.datetime | None = None, + time_stamp: datetime | None = None, data_label: str | None = None, variable_labels: dict[Hashable, str] | None = None, convert_strl: Sequence[Hashable] | None = None, diff --git a/pandas/plotting/_matplotlib/converter.py b/pandas/plotting/_matplotlib/converter.py index 9b0fe99e2d61e..9a4806ae51920 100644 --- a/pandas/plotting/_matplotlib/converter.py +++ b/pandas/plotting/_matplotlib/converter.py @@ -16,7 +16,6 @@ cast, ) -from dateutil.relativedelta import relativedelta import matplotlib.dates as mdates from matplotlib.ticker import ( AutoLocator, @@ -349,11 +348,7 @@ def __init__(self, locator, tz=None, defaultfmt: str = "%Y-%m-%d") -> None: class PandasAutoDateLocator(mdates.AutoDateLocator): def get_locator(self, dmin, dmax): """Pick the best locator based on a distance.""" - delta = relativedelta(dmax, dmin) - - num_days = (delta.years * 12.0 + delta.months) * 31.0 + delta.days - num_sec = (delta.hours * 60.0 + delta.minutes) * 60.0 + delta.seconds - tot_sec = num_days * 86400.0 + num_sec + tot_sec = (dmax - dmin).total_seconds() if abs(tot_sec) < self.minticks: self._freq = -1 diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index ceadf7a280a1b..5fca577ff28d1 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -25,6 +25,12 @@ class TestTimestampConstructors: + def test_weekday_but_no_day_raises(self): + # GH#52659 + msg = "Parsing datetimes with weekday but no day information is not supported" + with pytest.raises(ValueError, match=msg): + Timestamp("2023 Sept Thu") + def test_construct_from_string_invalid_raises(self): # dateutil (weirdly) parses "200622-12-31" as # datetime(2022, 6, 20, 12, 0, tzinfo=tzoffset(None, -111600)