-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
REF: de-duplicate get_conversion_factor #47770
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
if from_unit == to_unit: | ||
return 1 | ||
|
||
if from_unit == NPY_DATETIMEUNIT.NPY_FR_W: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Might be worth exploring a iterative solution in the future for perf maybe?
e.g.
factor = 1
while from_unit != to_unit:
factor *= factor_map[from_unit] e.g. 7 (day to week)
from_unit = factor_map_step[from_unit] e.g. `NPY_DATETIMEUNIT.NPY_FR_W`
return factor
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have a branch where i've implemented what i expect would be the maximally-performant solution:
+cdef extern from *:
+ """
+ /*
+ # equiv:
+
+ def func(x, y):
+ if x == "B" or y == "B":
+ return -1
+ dt = np.datetime64(1, x)
+ return dt.astype(f"M8[{y}]").view("i8")
+
+ units = ["Y", "M", "W", "B", "D", "h", "m", "s", "ms", "us", "ns"]
+ mat = np.array([[func(x, y) for x in units] for y in units]).T
+ */
+
+ // must use npy typedef b/c int64_t is aliased in cython-generated c
+ // LL needed to avoid overflows
+ // see https://github.com/pandas-dev/pandas/pull/34416/
+ static npy_int64 npy_unit_conversion_matrix[11][11] = {
+ {1, 12, 52, -1, 365, 8760, 525600, 31536000, 31536000*1000LL, 31536000*1000000LL, 31536000*1000000000LL},
+ {0, 1, 4, -1, 31, 744, 44640, 2678400, 2678400*1000LL, 2678400*1000000LL, 2678400*1000000000LL},
+ {0, 0, 1, -1, 7, 168, 10080, 604800, 604800*1000LL, 604800*1000000LL, 604800*1000000000LL},
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // NPY_FR_B not used
+ {0, 0, 0, -1, 1, 24, 1440, 86400, 86400*1000, 86400*1000000LL, 86400*1000000000LL},
+ {0, 0, 0, -1, 0, 1, 60, 3600, 3600*1000, 3600*1000000LL, 3600*1000000000LL},
+ {0, 0, 0, -1, 0, 0, 1, 60, 60*1000, 60*1000000LL, 60*1000000000LL},
+ {0, 0, 0, -1, 0, 0, 0, 1, 1000, 1000000LL, 1000000000LL},
+ {0, 0, 0, -1, 0, 0, 0, 0, 1, 1000LL, 1000000LL},
+ {0, 0, 0, -1, 0, 0, 0, 0, 0, 1, 1000LL},
+ {0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 1LL},
+ };
+ """
+ int64_t npy_unit_conversion_matrix[11][11]
+
+
+cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit) except? -1:
+ """
+ Find the factor by which we need to multiply to convert from from_unit to to_unit.
+ """
+ if (
+ from_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
+ or to_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
+ ):
+ raise ValueError("unit-less resolutions are not supported")
+ #if (
+ # from_unit == NPY_DATETIMEUNIT.NPY_FR_B
+ # or to_unit == NPY_DATETIMEUNIT.NPY_FR_B
+ #):
+ # raise ValueError("NPY_FR_B resolution is not supported")
+
+ if from_unit > to_unit:
+ raise ValueError
+ if to_unit > NPY_DATETIMEUNIT.NPY_FR_ns or from_unit > NPY_DATETIMEUNIT.NPY_FR_ns:
+ # We don't bother with ps, fs, as
+ raise NotImplementedError(from_unit, to_unit)
+
+ return npy_unit_conversion_matrix[<Py_ssize_t>from_unit][<Py_ssize_t>to_unit]
but so far it doesn't appear to make much difference.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Gotcha. This current version is fairly clear already so IMO what's here in this PR is probably just worth keeping for the clarity
Moving it to np_datetime bc we'll end up using it in astype_overflowsafe