Skip to content

REF: de-duplicate month_offset in tslibs.fields #35073

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/ccalendar.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,5 @@ cpdef int32_t get_day_of_year(int year, int month, int day) nogil
cdef int64_t DAY_NANOS
cdef int64_t HOUR_NANOS
cdef dict c_MONTH_NUMBERS

cdef int32_t* month_offset
4 changes: 2 additions & 2 deletions pandas/_libs/tslibs/ccalendar.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ cdef int* sakamoto_arr = [0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4]
# The first 13 entries give the month days elapsed as of the first of month N
# (or the total number of days in the year for N=13) in non-leap years.
# The remaining 13 entries give the days elapsed in leap years.
cdef int32_t* _month_offset = [
cdef int32_t* month_offset = [
Copy link
Member

@mroeschke mroeschke Jul 1, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe an optimization for the future. But we can maybe remove half the values and have the calls just do:

month_offset[month_index] + int(month > 1)

0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365,
0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366]

Expand Down Expand Up @@ -242,7 +242,7 @@ cpdef int32_t get_day_of_year(int year, int month, int day) nogil:

isleap = is_leapyear(year)

mo_off = _month_offset[isleap * 13 + month - 1]
mo_off = month_offset[isleap * 13 + month - 1]

day_of_year = mo_off + day
return day_of_year
Expand Down
37 changes: 15 additions & 22 deletions pandas/_libs/tslibs/fields.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ from pandas._libs.tslibs.ccalendar import (
from pandas._libs.tslibs.ccalendar cimport (
DAY_NANOS,
get_days_in_month, is_leapyear, dayofweek, get_week_of_year,
get_day_of_year, get_iso_calendar, iso_calendar_t)
get_day_of_year, get_iso_calendar, iso_calendar_t,
month_offset,
)
from pandas._libs.tslibs.np_datetime cimport (
npy_datetimestruct, pandas_timedeltastruct, dt64_to_dtstruct,
td64_to_tdstruct)
Expand Down Expand Up @@ -155,19 +157,10 @@ def get_start_end_field(const int64_t[:] dtindex, str field,
int end_month = 12
int start_month = 1
ndarray[int8_t] out
ndarray[int32_t, ndim=2] _month_offset
bint isleap
npy_datetimestruct dts
int mo_off, dom, doy, dow, ldom

_month_offset = np.array(
[
[0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365],
[0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366],
],
dtype=np.int32,
)

out = np.zeros(count, dtype='int8')

if freqstr:
Expand Down Expand Up @@ -226,10 +219,10 @@ def get_start_end_field(const int64_t[:] dtindex, str field,

dt64_to_dtstruct(dtindex[i], &dts)
isleap = is_leapyear(dts.year)
mo_off = _month_offset[isleap, dts.month - 1]
mo_off = month_offset[isleap * 13 + dts.month - 1]
dom = dts.day
doy = mo_off + dom
ldom = _month_offset[isleap, dts.month]
ldom = month_offset[isleap * 13 + dts.month]
dow = dayofweek(dts.year, dts.month, dts.day)

if (ldom == doy and dow < 5) or (
Expand All @@ -244,10 +237,10 @@ def get_start_end_field(const int64_t[:] dtindex, str field,

dt64_to_dtstruct(dtindex[i], &dts)
isleap = is_leapyear(dts.year)
mo_off = _month_offset[isleap, dts.month - 1]
mo_off = month_offset[isleap * 13 + dts.month - 1]
dom = dts.day
doy = mo_off + dom
ldom = _month_offset[isleap, dts.month]
ldom = month_offset[isleap * 13 + dts.month]

if ldom == doy:
out[i] = 1
Expand Down Expand Up @@ -288,10 +281,10 @@ def get_start_end_field(const int64_t[:] dtindex, str field,

dt64_to_dtstruct(dtindex[i], &dts)
isleap = is_leapyear(dts.year)
mo_off = _month_offset[isleap, dts.month - 1]
mo_off = month_offset[isleap * 13 + dts.month - 1]
dom = dts.day
doy = mo_off + dom
ldom = _month_offset[isleap, dts.month]
ldom = month_offset[isleap * 13 + dts.month]
dow = dayofweek(dts.year, dts.month, dts.day)

if ((dts.month - end_month) % 3 == 0) and (
Expand All @@ -307,10 +300,10 @@ def get_start_end_field(const int64_t[:] dtindex, str field,

dt64_to_dtstruct(dtindex[i], &dts)
isleap = is_leapyear(dts.year)
mo_off = _month_offset[isleap, dts.month - 1]
mo_off = month_offset[isleap * 13 + dts.month - 1]
dom = dts.day
doy = mo_off + dom
ldom = _month_offset[isleap, dts.month]
ldom = month_offset[isleap * 13 + dts.month]

if ((dts.month - end_month) % 3 == 0) and (ldom == doy):
out[i] = 1
Expand Down Expand Up @@ -352,10 +345,10 @@ def get_start_end_field(const int64_t[:] dtindex, str field,
dt64_to_dtstruct(dtindex[i], &dts)
isleap = is_leapyear(dts.year)
dom = dts.day
mo_off = _month_offset[isleap, dts.month - 1]
mo_off = month_offset[isleap * 13 + dts.month - 1]
doy = mo_off + dom
dow = dayofweek(dts.year, dts.month, dts.day)
ldom = _month_offset[isleap, dts.month]
ldom = month_offset[isleap * 13 + dts.month]

if (dts.month == end_month) and (
(ldom == doy and dow < 5) or (
Expand All @@ -370,10 +363,10 @@ def get_start_end_field(const int64_t[:] dtindex, str field,

dt64_to_dtstruct(dtindex[i], &dts)
isleap = is_leapyear(dts.year)
mo_off = _month_offset[isleap, dts.month - 1]
mo_off = month_offset[isleap * 13 + dts.month - 1]
dom = dts.day
doy = mo_off + dom
ldom = _month_offset[isleap, dts.month]
ldom = month_offset[isleap * 13 + dts.month]

if (dts.month == end_month) and (ldom == doy):
out[i] = 1
Expand Down