diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx index bafde9f3b237b..f0f73d242cdf0 100644 --- a/pandas/_libs/tslibs/dtypes.pyx +++ b/pandas/_libs/tslibs/dtypes.pyx @@ -460,7 +460,6 @@ cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) noexcept nogil: return NPY_DATETIMEUNIT.NPY_FR_D -# TODO: use in _matplotlib.converter? cpdef int64_t periods_per_day( NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns ) except? -1: diff --git a/pandas/plotting/_matplotlib/converter.py b/pandas/plotting/_matplotlib/converter.py index 33aeaa6d81406..3f77a32014bff 100644 --- a/pandas/plotting/_matplotlib/converter.py +++ b/pandas/plotting/_matplotlib/converter.py @@ -11,7 +11,6 @@ from typing import ( TYPE_CHECKING, Any, - Final, cast, ) @@ -30,8 +29,14 @@ Timestamp, to_offset, ) -from pandas._libs.tslibs.dtypes import FreqGroup -from pandas._typing import F +from pandas._libs.tslibs.dtypes import ( + FreqGroup, + periods_per_day, +) +from pandas._typing import ( + F, + npt, +) from pandas.core.dtypes.common import ( is_float, @@ -60,15 +65,6 @@ from pandas._libs.tslibs.offsets import BaseOffset -# constants -HOURS_PER_DAY: Final = 24.0 -MIN_PER_HOUR: Final = 60.0 -SEC_PER_MIN: Final = 60.0 - -SEC_PER_HOUR: Final = SEC_PER_MIN * MIN_PER_HOUR -SEC_PER_DAY: Final = SEC_PER_HOUR * HOURS_PER_DAY - -MUSEC_PER_DAY: Final = 10**6 * SEC_PER_DAY _mpl_units = {} # Cache for units overwritten by us @@ -495,7 +491,7 @@ def _get_default_annual_spacing(nyears) -> tuple[int, int]: return (min_spacing, maj_spacing) -def period_break(dates: PeriodIndex, period: str) -> np.ndarray: +def _period_break(dates: PeriodIndex, period: str) -> npt.NDArray[np.intp]: """ Returns the indices where the given period changes. @@ -506,12 +502,17 @@ def period_break(dates: PeriodIndex, period: str) -> np.ndarray: period : str Name of the period to monitor. """ + mask = _period_break_mask(dates, period) + return np.nonzero(mask)[0] + + +def _period_break_mask(dates: PeriodIndex, period: str) -> npt.NDArray[np.bool_]: current = getattr(dates, period) previous = getattr(dates - 1 * dates.freq, period) - return np.nonzero(current - previous)[0] + return current != previous -def has_level_label(label_flags: np.ndarray, vmin: float) -> bool: +def has_level_label(label_flags: npt.NDArray[np.intp], vmin: float) -> bool: """ Returns true if the ``label_flags`` indicate there is at least one label for this level. @@ -527,54 +528,59 @@ def has_level_label(label_flags: np.ndarray, vmin: float) -> bool: return True -def _daily_finder(vmin, vmax, freq: BaseOffset): +def _get_periods_per_ymd(freq: BaseOffset) -> tuple[int, int, int]: # error: "BaseOffset" has no attribute "_period_dtype_code" dtype_code = freq._period_dtype_code # type: ignore[attr-defined] freq_group = FreqGroup.from_period_dtype_code(dtype_code) - periodsperday = -1 + ppd = -1 # placeholder for above-day freqs if dtype_code >= FreqGroup.FR_HR.value: - if freq_group == FreqGroup.FR_NS: - periodsperday = 24 * 60 * 60 * 1000000000 - elif freq_group == FreqGroup.FR_US: - periodsperday = 24 * 60 * 60 * 1000000 - elif freq_group == FreqGroup.FR_MS: - periodsperday = 24 * 60 * 60 * 1000 - elif freq_group == FreqGroup.FR_SEC: - periodsperday = 24 * 60 * 60 - elif freq_group == FreqGroup.FR_MIN: - periodsperday = 24 * 60 - elif freq_group == FreqGroup.FR_HR: - periodsperday = 24 - else: # pragma: no cover - raise ValueError(f"unexpected frequency: {dtype_code}") - periodsperyear = 365 * periodsperday - periodspermonth = 28 * periodsperday - + # error: "BaseOffset" has no attribute "_creso" + ppd = periods_per_day(freq._creso) # type: ignore[attr-defined] + ppm = 28 * ppd + ppy = 365 * ppd elif freq_group == FreqGroup.FR_BUS: - periodsperyear = 261 - periodspermonth = 19 + ppm = 19 + ppy = 261 elif freq_group == FreqGroup.FR_DAY: - periodsperyear = 365 - periodspermonth = 28 + ppm = 28 + ppy = 365 elif freq_group == FreqGroup.FR_WK: - periodsperyear = 52 - periodspermonth = 3 - else: # pragma: no cover - raise ValueError("unexpected frequency") + ppm = 3 + ppy = 52 + elif freq_group == FreqGroup.FR_MTH: + ppm = 1 + ppy = 12 + elif freq_group == FreqGroup.FR_QTR: + ppm = -1 # placerholder + ppy = 4 + elif freq_group == FreqGroup.FR_ANN: + ppm = -1 # placeholder + ppy = 1 + else: + raise NotImplementedError(f"Unsupported frequency: {dtype_code}") + + return ppd, ppm, ppy + + +def _daily_finder(vmin, vmax, freq: BaseOffset) -> np.ndarray: + # error: "BaseOffset" has no attribute "_period_dtype_code" + dtype_code = freq._period_dtype_code # type: ignore[attr-defined] + + periodsperday, periodspermonth, periodsperyear = _get_periods_per_ymd(freq) # save this for later usage vmin_orig = vmin + (vmin, vmax) = (int(vmin), int(vmax)) + span = vmax - vmin + 1 - (vmin, vmax) = ( - Period(ordinal=int(vmin), freq=freq), - Period(ordinal=int(vmax), freq=freq), + dates_ = period_range( + start=Period(ordinal=vmin, freq=freq), + end=Period(ordinal=vmax, freq=freq), + freq=freq, ) - assert isinstance(vmin, Period) - assert isinstance(vmax, Period) - span = vmax.ordinal - vmin.ordinal + 1 - dates_ = period_range(start=vmin, end=vmax, freq=freq) + # Initialize the output info = np.zeros( span, dtype=[("val", np.int64), ("maj", bool), ("min", bool), ("fmt", "|S20")] @@ -595,45 +601,38 @@ def first_label(label_flags): # Case 1. Less than a month if span <= periodspermonth: - day_start = period_break(dates_, "day") - month_start = period_break(dates_, "month") + day_start = _period_break(dates_, "day") + month_start = _period_break(dates_, "month") + year_start = _period_break(dates_, "year") - def _hour_finder(label_interval, force_year_start) -> None: - _hour = dates_.hour - _prev_hour = (dates_ - 1 * dates_.freq).hour - hour_start = (_hour - _prev_hour) != 0 + def _hour_finder(label_interval: int, force_year_start: bool) -> None: + target = dates_.hour + mask = _period_break_mask(dates_, "hour") info_maj[day_start] = True - info_min[hour_start & (_hour % label_interval == 0)] = True - year_start = period_break(dates_, "year") - info_fmt[hour_start & (_hour % label_interval == 0)] = "%H:%M" + info_min[mask & (target % label_interval == 0)] = True + info_fmt[mask & (target % label_interval == 0)] = "%H:%M" info_fmt[day_start] = "%H:%M\n%d-%b" info_fmt[year_start] = "%H:%M\n%d-%b\n%Y" if force_year_start and not has_level_label(year_start, vmin_orig): info_fmt[first_label(day_start)] = "%H:%M\n%d-%b\n%Y" - def _minute_finder(label_interval) -> None: - hour_start = period_break(dates_, "hour") - _minute = dates_.minute - _prev_minute = (dates_ - 1 * dates_.freq).minute - minute_start = (_minute - _prev_minute) != 0 + def _minute_finder(label_interval: int) -> None: + target = dates_.minute + hour_start = _period_break(dates_, "hour") + mask = _period_break_mask(dates_, "minute") info_maj[hour_start] = True - info_min[minute_start & (_minute % label_interval == 0)] = True - year_start = period_break(dates_, "year") - info_fmt = info["fmt"] - info_fmt[minute_start & (_minute % label_interval == 0)] = "%H:%M" + info_min[mask & (target % label_interval == 0)] = True + info_fmt[mask & (target % label_interval == 0)] = "%H:%M" info_fmt[day_start] = "%H:%M\n%d-%b" info_fmt[year_start] = "%H:%M\n%d-%b\n%Y" - def _second_finder(label_interval) -> None: - minute_start = period_break(dates_, "minute") - _second = dates_.second - _prev_second = (dates_ - 1 * dates_.freq).second - second_start = (_second - _prev_second) != 0 - info["maj"][minute_start] = True - info["min"][second_start & (_second % label_interval == 0)] = True - year_start = period_break(dates_, "year") - info_fmt = info["fmt"] - info_fmt[second_start & (_second % label_interval == 0)] = "%H:%M:%S" + def _second_finder(label_interval: int) -> None: + target = dates_.second + minute_start = _period_break(dates_, "minute") + mask = _period_break_mask(dates_, "second") + info_maj[minute_start] = True + info_min[mask & (target % label_interval == 0)] = True + info_fmt[mask & (target % label_interval == 0)] = "%H:%M:%S" info_fmt[day_start] = "%H:%M:%S\n%d-%b" info_fmt[year_start] = "%H:%M:%S\n%d-%b\n%Y" @@ -672,8 +671,6 @@ def _second_finder(label_interval) -> None: else: info_maj[month_start] = True info_min[day_start] = True - year_start = period_break(dates_, "year") - info_fmt = info["fmt"] info_fmt[day_start] = "%d" info_fmt[month_start] = "%d\n%b" info_fmt[year_start] = "%d\n%b\n%Y" @@ -685,15 +682,15 @@ def _second_finder(label_interval) -> None: # Case 2. Less than three months elif span <= periodsperyear // 4: - month_start = period_break(dates_, "month") + month_start = _period_break(dates_, "month") info_maj[month_start] = True if dtype_code < FreqGroup.FR_HR.value: info["min"] = True else: - day_start = period_break(dates_, "day") + day_start = _period_break(dates_, "day") info["min"][day_start] = True - week_start = period_break(dates_, "week") - year_start = period_break(dates_, "year") + week_start = _period_break(dates_, "week") + year_start = _period_break(dates_, "year") info_fmt[week_start] = "%d" info_fmt[month_start] = "\n\n%b" info_fmt[year_start] = "\n\n%b\n%Y" @@ -704,9 +701,9 @@ def _second_finder(label_interval) -> None: info_fmt[first_label(month_start)] = "\n\n%b\n%Y" # Case 3. Less than 14 months ............... elif span <= 1.15 * periodsperyear: - year_start = period_break(dates_, "year") - month_start = period_break(dates_, "month") - week_start = period_break(dates_, "week") + year_start = _period_break(dates_, "year") + month_start = _period_break(dates_, "month") + week_start = _period_break(dates_, "week") info_maj[month_start] = True info_min[week_start] = True info_min[year_start] = False @@ -717,17 +714,17 @@ def _second_finder(label_interval) -> None: info_fmt[first_label(month_start)] = "%b\n%Y" # Case 4. Less than 2.5 years ............... elif span <= 2.5 * periodsperyear: - year_start = period_break(dates_, "year") - quarter_start = period_break(dates_, "quarter") - month_start = period_break(dates_, "month") + year_start = _period_break(dates_, "year") + quarter_start = _period_break(dates_, "quarter") + month_start = _period_break(dates_, "month") info_maj[quarter_start] = True info_min[month_start] = True info_fmt[quarter_start] = "%b" info_fmt[year_start] = "%b\n%Y" # Case 4. Less than 4 years ................. elif span <= 4 * periodsperyear: - year_start = period_break(dates_, "year") - month_start = period_break(dates_, "month") + year_start = _period_break(dates_, "year") + month_start = _period_break(dates_, "month") info_maj[year_start] = True info_min[month_start] = True info_min[year_start] = False @@ -738,15 +735,15 @@ def _second_finder(label_interval) -> None: info_fmt[year_start] = "%b\n%Y" # Case 5. Less than 11 years ................ elif span <= 11 * periodsperyear: - year_start = period_break(dates_, "year") - quarter_start = period_break(dates_, "quarter") + year_start = _period_break(dates_, "year") + quarter_start = _period_break(dates_, "quarter") info_maj[year_start] = True info_min[quarter_start] = True info_min[year_start] = False info_fmt[year_start] = "%Y" # Case 6. More than 12 years ................ else: - year_start = period_break(dates_, "year") + year_start = _period_break(dates_, "year") year_break = dates_[year_start].year nyears = span / periodsperyear (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) @@ -759,8 +756,8 @@ def _second_finder(label_interval) -> None: return info -def _monthly_finder(vmin, vmax, freq): - periodsperyear = 12 +def _monthly_finder(vmin, vmax, freq: BaseOffset) -> np.ndarray: + _, _, periodsperyear = _get_periods_per_ymd(freq) vmin_orig = vmin (vmin, vmax) = (int(vmin), int(vmax)) @@ -795,6 +792,7 @@ def _monthly_finder(vmin, vmax, freq): quarter_start = (dates_ % 3 == 0).nonzero() info_maj[year_start] = True # TODO: Check the following : is it really info['fmt'] ? + # 2023-09-15 this is reached in test_finder_monthly info["fmt"][quarter_start] = True info["min"] = True @@ -829,8 +827,8 @@ def _monthly_finder(vmin, vmax, freq): return info -def _quarterly_finder(vmin, vmax, freq): - periodsperyear = 4 +def _quarterly_finder(vmin, vmax, freq: BaseOffset) -> np.ndarray: + _, _, periodsperyear = _get_periods_per_ymd(freq) vmin_orig = vmin (vmin, vmax) = (int(vmin), int(vmax)) span = vmax - vmin + 1 @@ -876,7 +874,8 @@ def _quarterly_finder(vmin, vmax, freq): return info -def _annual_finder(vmin, vmax, freq): +def _annual_finder(vmin, vmax, freq: BaseOffset) -> np.ndarray: + # Note: small difference here vs other finders in adding 1 to vmax (vmin, vmax) = (int(vmin), int(vmax + 1)) span = vmax - vmin + 1 @@ -889,8 +888,9 @@ def _annual_finder(vmin, vmax, freq): (min_anndef, maj_anndef) = _get_default_annual_spacing(span) major_idx = dates_ % maj_anndef == 0 + minor_idx = dates_ % min_anndef == 0 info["maj"][major_idx] = True - info["min"][(dates_ % min_anndef == 0)] = True + info["min"][minor_idx] = True info["fmt"][major_idx] = "%Y" return info @@ -1087,7 +1087,7 @@ def format_timedelta_ticks(x, pos, n_decimals: int) -> str: """ Convert seconds to 'D days HH:MM:SS.F' """ - s, ns = divmod(x, 10**9) + s, ns = divmod(x, 10**9) # TODO(non-nano): this looks like it assumes ns m, s = divmod(s, 60) h, m = divmod(m, 60) d, h = divmod(h, 24) diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py index 0108079f1110f..7d574b86cef36 100644 --- a/pandas/tests/plotting/test_converter.py +++ b/pandas/tests/plotting/test_converter.py @@ -10,6 +10,8 @@ import pandas._config.config as cf +from pandas._libs.tslibs import to_offset + from pandas import ( Index, Period, @@ -390,7 +392,7 @@ def test_quarterly_finder(year_span): pytest.skip("the quarterly finder is only invoked if the span is >= 45") nyears = span / 4 (min_anndef, maj_anndef) = converter._get_default_annual_spacing(nyears) - result = converter._quarterly_finder(vmin, vmax, "Q") + result = converter._quarterly_finder(vmin, vmax, to_offset("Q")) quarters = PeriodIndex( arrays.PeriodArray(np.array([x[0] for x in result]), dtype="period[Q]") )