From ec179ec97b01a35006f8c5d349ef06177b4904fe Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 30 Nov 2018 16:35:32 -0800 Subject: [PATCH 1/4] implement ignore_errors_out_of_bounds_fallback --- pandas/_libs/tslib.pyx | 56 +++++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 17 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 609608a0948c5..ee61ec46767c1 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -741,28 +741,50 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', if is_raise: raise - oresult = np.empty(n, dtype=object) - for i in range(n): - val = values[i] + return ignore_errors_out_of_bounds_fallback(values), tz_out - # set as nan except if its a NaT - if checknull_with_nat(val): - if isinstance(val, float): - oresult[i] = np.nan - else: - oresult[i] = NaT - elif is_datetime64_object(val): - if get_datetime64_value(val) == NPY_NAT: - oresult[i] = NaT - else: - oresult[i] = val.item() - else: - oresult[i] = val - return oresult, tz_out except TypeError: return array_to_datetime_object(values, is_raise, dayfirst, yearfirst) +cdef inline ignore_errors_out_of_bounds_fallback(ndarray[object] values): + """ + Fallback for array_to_datetime if an OutOfBoundsDatetime is raised + and errors == "ignore" + + Parameters + ---------- + values : ndarray[object] + + Returns + ------- + ndarray[object] + """ + cdef: + Py_ssize_t i, n = len(values) + object val + + oresult = np.empty(n, dtype=object) + + for i in range(n): + val = values[i] + + # set as nan except if its a NaT + if checknull_with_nat(val): + if isinstance(val, float): + oresult[i] = np.nan + else: + oresult[i] = NaT + elif is_datetime64_object(val): + if get_datetime64_value(val) == NPY_NAT: + oresult[i] = NaT + else: + oresult[i] = val.item() + else: + oresult[i] = val + return oresult + + @cython.wraparound(False) @cython.boundscheck(False) cdef array_to_datetime_object(ndarray[object] values, bint is_raise, From 7a84a259459deeb60d5115e0b1411113afc679cc Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 30 Nov 2018 16:37:09 -0800 Subject: [PATCH 2/4] move tz-alignment outside of loop --- pandas/_libs/tslib.pyx | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index ee61ec46767c1..0f76d27d417ae 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -722,21 +722,6 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', else: raise TypeError - if seen_datetime_offset and not utc_convert: - # GH 17697 - # 1) If all the offsets are equal, return one offset for - # the parsed dates to (maybe) pass to DatetimeIndex - # 2) If the offsets are different, then force the parsing down the - # object path where an array of datetimes - # (with individual dateutil.tzoffsets) are returned - is_same_offsets = len(out_tzoffset_vals) == 1 - if not is_same_offsets: - return array_to_datetime_object(values, is_raise, - dayfirst, yearfirst) - else: - tz_offset = out_tzoffset_vals.pop() - tz_out = pytz.FixedOffset(tz_offset / 60.) - return result, tz_out except OutOfBoundsDatetime: if is_raise: raise @@ -746,6 +731,22 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', except TypeError: return array_to_datetime_object(values, is_raise, dayfirst, yearfirst) + if seen_datetime_offset and not utc_convert: + # GH#17697 + # 1) If all the offsets are equal, return one offset for + # the parsed dates to (maybe) pass to DatetimeIndex + # 2) If the offsets are different, then force the parsing down the + # object path where an array of datetimes + # (with individual dateutil.tzoffsets) are returned + is_same_offsets = len(out_tzoffset_vals) == 1 + if not is_same_offsets: + return array_to_datetime_object(values, is_raise, + dayfirst, yearfirst) + else: + tz_offset = out_tzoffset_vals.pop() + tz_out = pytz.FixedOffset(tz_offset / 60.) + return result, tz_out + cdef inline ignore_errors_out_of_bounds_fallback(ndarray[object] values): """ From 9b31c010229e73c96e364c60c18e6a040b363ef8 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 30 Nov 2018 16:38:58 -0800 Subject: [PATCH 3/4] call func directly rather than using TypeError for flow control --- pandas/_libs/tslib.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 0f76d27d417ae..de6587e4287eb 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -720,7 +720,8 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', raise ValueError( "mixed datetimes and integers in passed array") else: - raise TypeError + return array_to_datetime_object(values, is_raise, + dayfirst, yearfirst) except OutOfBoundsDatetime: if is_raise: From 01e86614eed97771a65971467ffe36506df03a6f Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 30 Nov 2018 18:05:35 -0800 Subject: [PATCH 4/4] move seen_datetime & seen_integer checks outside of try/except --- pandas/_libs/tslib.pyx | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index de6587e4287eb..f545e113949d9 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -520,9 +520,10 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', # specify error conditions assert is_raise or is_ignore or is_coerce + result = np.empty(n, dtype='M8[ns]') + iresult = result.view('i8') + try: - result = np.empty(n, dtype='M8[ns]') - iresult = result.view('i8') for i in range(n): val = values[i] @@ -706,23 +707,6 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', raise TypeError("{typ} is not convertible to datetime" .format(typ=type(val))) - if seen_datetime and seen_integer: - # we have mixed datetimes & integers - - if is_coerce: - # coerce all of the integers/floats to NaT, preserve - # the datetimes and other convertibles - for i in range(n): - val = values[i] - if is_integer_object(val) or is_float_object(val): - result[i] = NPY_NAT - elif is_raise: - raise ValueError( - "mixed datetimes and integers in passed array") - else: - return array_to_datetime_object(values, is_raise, - dayfirst, yearfirst) - except OutOfBoundsDatetime: if is_raise: raise @@ -732,6 +716,22 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', except TypeError: return array_to_datetime_object(values, is_raise, dayfirst, yearfirst) + if seen_datetime and seen_integer: + # we have mixed datetimes & integers + + if is_coerce: + # coerce all of the integers/floats to NaT, preserve + # the datetimes and other convertibles + for i in range(n): + val = values[i] + if is_integer_object(val) or is_float_object(val): + result[i] = NPY_NAT + elif is_raise: + raise ValueError("mixed datetimes and integers in passed array") + else: + return array_to_datetime_object(values, is_raise, + dayfirst, yearfirst) + if seen_datetime_offset and not utc_convert: # GH#17697 # 1) If all the offsets are equal, return one offset for