Skip to content

ENH: fields.get_start_end_field support non-nano #46902

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/tslibs/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ class TimeGetStartEndField:

def setup(self, size, side, period, freqstr, month_kw):
arr = np.random.randint(0, 10, size=size, dtype="i8")
self.i8data = arr
self.dt64data = arr.view("M8[ns]")

self.attrname = f"is_{period}_{side}"

def time_get_start_end_field(self, size, side, period, freqstr, month_kw):
get_start_end_field(self.i8data, self.attrname, freqstr, month_kw=month_kw)
get_start_end_field(self.dt64data, self.attrname, freqstr, month_kw=month_kw)
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/fields.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def get_date_name_field(
locale: str | None = ...,
) -> npt.NDArray[np.object_]: ...
def get_start_end_field(
dtindex: npt.NDArray[np.int64], # const int64_t[:]
dt64values: npt.NDArray[np.datetime64],
field: str,
freqstr: str | None = ...,
month_kw: int = ...,
Expand Down
30 changes: 24 additions & 6 deletions pandas/_libs/tslibs/fields.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ objects and arrays
from locale import LC_TIME

from _strptime import LocaleTime

cimport cython
from cython cimport Py_ssize_t

import numpy as np

cimport numpy as cnp
Expand Down Expand Up @@ -41,8 +43,11 @@ from pandas._libs.tslibs.ccalendar cimport (
)
from pandas._libs.tslibs.nattype cimport NPY_NAT
from pandas._libs.tslibs.np_datetime cimport (
NPY_DATETIMEUNIT,
dt64_to_dtstruct,
get_unit_from_dtype,
npy_datetimestruct,
pandas_datetime_to_datetimestruct,
pandas_timedeltastruct,
td64_to_tdstruct,
)
Expand Down Expand Up @@ -196,22 +201,35 @@ cdef inline bint _is_on_month(int month, int compare_month, int modby) nogil:

@cython.wraparound(False)
@cython.boundscheck(False)
def get_start_end_field(const int64_t[:] dtindex, str field,
def get_start_end_field(ndarray dt64values, str field,
str freqstr=None, int month_kw=12):
"""
Given an int64-based datetime index return array of indicators
of whether timestamps are at the start/end of the month/quarter/year
(defined by frequency).

Parameters
----------
dt64values : ndarray[datetime64], any resolution
field : str
frestr : str or None, default None
month_kw : int, default 12

Returns
-------
ndarray[bool]
"""
cdef:
Py_ssize_t i
int count = len(dtindex)
int count = dt64values.size
bint is_business = 0
int end_month = 12
int start_month = 1
ndarray[int8_t] out
npy_datetimestruct dts
int compare_month, modby
ndarray dtindex = dt64values.view("i8")
NPY_DATETIMEUNIT reso = get_unit_from_dtype(dt64values.dtype)

out = np.zeros(count, dtype='int8')

Expand Down Expand Up @@ -251,7 +269,7 @@ def get_start_end_field(const int64_t[:] dtindex, str field,
out[i] = 0
continue

dt64_to_dtstruct(dtindex[i], &dts)
pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts)

if _is_on_month(dts.month, compare_month, modby) and (
dts.day == get_firstbday(dts.year, dts.month)):
Expand All @@ -263,7 +281,7 @@ def get_start_end_field(const int64_t[:] dtindex, str field,
out[i] = 0
continue

dt64_to_dtstruct(dtindex[i], &dts)
pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts)

if _is_on_month(dts.month, compare_month, modby) and dts.day == 1:
out[i] = 1
Expand All @@ -275,7 +293,7 @@ def get_start_end_field(const int64_t[:] dtindex, str field,
out[i] = 0
continue

dt64_to_dtstruct(dtindex[i], &dts)
pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts)

if _is_on_month(dts.month, compare_month, modby) and (
dts.day == get_lastbday(dts.year, dts.month)):
Expand All @@ -287,7 +305,7 @@ def get_start_end_field(const int64_t[:] dtindex, str field,
out[i] = 0
continue

dt64_to_dtstruct(dtindex[i], &dts)
pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts)

if _is_on_month(dts.month, compare_month, modby) and (
dts.day == get_days_in_month(dts.year, dts.month)):
Expand Down
8 changes: 4 additions & 4 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -487,9 +487,7 @@ cdef class _Timestamp(ABCTimestamp):
dict kwds
ndarray[uint8_t, cast=True] out
int month_kw

if self._reso != NPY_FR_ns:
raise NotImplementedError(self._reso)
str unit

if freq:
kwds = freq.kwds
Expand All @@ -500,7 +498,9 @@ cdef class _Timestamp(ABCTimestamp):
freqstr = None

val = self._maybe_convert_value_to_local()
out = get_start_end_field(np.array([val], dtype=np.int64),

unit = npy_unit_to_abbrev(self._reso)
out = get_start_end_field(np.array([val], dtype=f"M8[{unit}]"),
field, freqstr, month_kw)
return out[0]

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def f(self):
month_kw = kwds.get("startingMonth", kwds.get("month", 12))

result = fields.get_start_end_field(
values, field, self.freqstr, month_kw
values.view(self._ndarray.dtype), field, self.freqstr, month_kw
)
else:
result = fields.get_date_field(values, field)
Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/scalar/timestamp/test_timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,27 @@ def test_non_nano_fields(self, dt64, ts):
assert ts.weekday() == alt.weekday()
assert ts.isoweekday() == alt.isoweekday()

def test_start_end_fields(self, ts):
assert ts.is_year_start
assert ts.is_quarter_start
assert ts.is_month_start
assert not ts.is_year_end
assert not ts.is_month_end
assert not ts.is_month_end

freq = offsets.BDay()
ts._set_freq(freq)

# 2016-01-01 is a Friday, so is year/quarter/month start with this freq
msg = "Timestamp.freq is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert ts.is_year_start
assert ts.is_quarter_start
assert ts.is_month_start
assert not ts.is_year_end
assert not ts.is_month_end
assert not ts.is_month_end

def test_repr(self, dt64, ts):
alt = Timestamp(dt64)

Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/tslibs/test_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@ def test_get_date_field_readonly(dtindex):


def test_get_start_end_field_readonly(dtindex):
result = fields.get_start_end_field(dtindex, "is_month_start", None)
dt64values = dtindex.view("M8[ns]")
dt64values.flags.writeable = False

result = fields.get_start_end_field(dt64values, "is_month_start", None)
expected = np.array([True, False, False, False, False], dtype=np.bool_)
tm.assert_numpy_array_equal(result, expected)

Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"):
"_libs.tslibs.fields": {
"pyxfile": "_libs/tslibs/fields",
"depends": tseries_depends,
"sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"],
},
"_libs.tslibs.nattype": {"pyxfile": "_libs/tslibs/nattype"},
"_libs.tslibs.np_datetime": {
Expand Down