From fe9f43b872c2877ad8538829a7343d4979dc4482 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 3 Jan 2019 19:34:37 -0800 Subject: [PATCH 1/8] use is_null_datetimelike to get rid of redundant is_null_datelike_scalar --- pandas/_libs/missing.pyx | 10 +++++----- pandas/_libs/tslibs/__init__.py | 2 +- pandas/_libs/tslibs/nattype.pxd | 2 +- pandas/_libs/tslibs/nattype.pyx | 2 +- pandas/core/dtypes/missing.py | 18 +----------------- pandas/core/internals/blocks.py | 17 ++++++++--------- pandas/tests/tslibs/test_api.py | 1 + 7 files changed, 18 insertions(+), 34 deletions(-) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index c7f06bc5d7d4f..bb7d945bd190e 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -13,7 +13,7 @@ cimport pandas._libs.util as util from pandas._libs.tslibs.np_datetime cimport ( get_timedelta64_value, get_datetime64_value) from pandas._libs.tslibs.nattype cimport checknull_with_nat -from pandas._libs.tslibs.nattype import NaT +from pandas._libs.tslibs.nattype cimport c_NaT cdef float64_t INF = np.inf cdef float64_t NEGINF = -INF @@ -27,7 +27,7 @@ cdef inline bint _check_all_nulls(object val): if isinstance(val, (float, complex)): res = val != val - elif val is NaT: + elif val is c_NaT: res = 1 elif val is None: res = 1 @@ -67,7 +67,7 @@ cpdef bint checknull(object val): return val != val # and val != INF and val != NEGINF elif util.is_datetime64_object(val): return get_datetime64_value(val) == NPY_NAT - elif val is NaT: + elif val is c_NaT: return True elif util.is_timedelta64_object(val): return get_timedelta64_value(val) == NPY_NAT @@ -106,7 +106,7 @@ cpdef bint checknull_old(object val): return val != val or val == INF or val == NEGINF elif util.is_datetime64_object(val): return get_datetime64_value(val) == NPY_NAT - elif val is NaT: + elif val is c_NaT: return True elif util.is_timedelta64_object(val): return get_timedelta64_value(val) == NPY_NAT @@ -190,7 +190,7 @@ def isnaobj_old(ndarray arr): result = np.zeros(n, dtype=np.uint8) for i in range(n): val = arr[i] - result[i] = val is NaT or _check_none_nan_inf_neginf(val) + result[i] = val is c_NaT or _check_none_nan_inf_neginf(val) return result.view(np.bool_) diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py index c7765a2c2b89c..38401cab57f5d 100644 --- a/pandas/_libs/tslibs/__init__.py +++ b/pandas/_libs/tslibs/__init__.py @@ -2,7 +2,7 @@ # flake8: noqa from .conversion import normalize_date, localize_pydatetime, tz_convert_single -from .nattype import NaT, iNaT +from .nattype import NaT, iNaT, is_null_datetimelike from .np_datetime import OutOfBoundsDatetime from .period import Period, IncompatibleFrequency from .timestamps import Timestamp diff --git a/pandas/_libs/tslibs/nattype.pxd b/pandas/_libs/tslibs/nattype.pxd index f649518e969be..ee8d5ca3d861c 100644 --- a/pandas/_libs/tslibs/nattype.pxd +++ b/pandas/_libs/tslibs/nattype.pxd @@ -17,4 +17,4 @@ cdef _NaT c_NaT cdef bint checknull_with_nat(object val) -cdef bint is_null_datetimelike(object val) +cpdef bint is_null_datetimelike(object val) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 604599f895476..df083f27ad653 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -686,7 +686,7 @@ cdef inline bint checknull_with_nat(object val): return val is None or util.is_nan(val) or val is c_NaT -cdef inline bint is_null_datetimelike(object val): +cpdef bint is_null_datetimelike(object val): """ Determine if we have a null for a timedelta/datetime (or integer versions) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index b22cb1050f140..fd94d233f4d14 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -10,7 +10,7 @@ _NS_DTYPE, _TD_DTYPE, ensure_object, is_bool_dtype, is_complex_dtype, is_datetime64_dtype, is_datetime64tz_dtype, is_datetimelike, is_datetimelike_v_numeric, is_dtype_equal, is_extension_array_dtype, - is_float_dtype, is_integer, is_integer_dtype, is_object_dtype, + is_float_dtype, is_integer_dtype, is_object_dtype, is_period_dtype, is_scalar, is_string_dtype, is_string_like_dtype, is_timedelta64_dtype, needs_i8_conversion, pandas_dtype) from .generic import ( @@ -339,22 +339,6 @@ def notna(obj): notnull = notna -def is_null_datelike_scalar(other): - """ test whether the object is a null datelike, e.g. Nat - but guard against passing a non-scalar """ - if other is NaT or other is None: - return True - elif is_scalar(other): - - # a timedelta - if hasattr(other, 'dtype'): - return other.view('i8') == iNaT - elif is_integer(other) and other == iNaT: - return True - return isna(other) - return False - - def _isna_compat(arr, fill_value=np.nan): """ Parameters diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 384676ede15f2..dbb30191878bd 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -8,7 +8,7 @@ import numpy as np from pandas._libs import internals as libinternals, lib, tslib, tslibs -from pandas._libs.tslibs import Timedelta, conversion +from pandas._libs.tslibs import Timedelta, conversion, is_null_datetimelike import pandas.compat as compat from pandas.compat import range, zip from pandas.util._validators import validate_bool_kwarg @@ -31,7 +31,7 @@ ABCDataFrame, ABCDatetimeIndex, ABCExtensionArray, ABCIndexClass, ABCSeries) from pandas.core.dtypes.missing import ( - _isna_compat, array_equivalent, is_null_datelike_scalar, isna, notna) + _isna_compat, array_equivalent, isna, notna) import pandas.core.algorithms as algos from pandas.core.arrays import ( @@ -2191,7 +2191,7 @@ def _try_coerce_args(self, values, other): if isinstance(other, bool): raise TypeError - elif is_null_datelike_scalar(other): + elif is_null_datetimelike(other): other = tslibs.iNaT elif isinstance(other, (datetime, np.datetime64, date)): other = self._box_func(other) @@ -2204,7 +2204,7 @@ def _try_coerce_args(self, values, other): else: # coercion issues # let higher levels handle - raise TypeError + raise TypeError(other) return values, other @@ -2393,8 +2393,7 @@ def _try_coerce_args(self, values, other): # add the tz back other = self._holder(other, dtype=self.dtype) - elif (is_null_datelike_scalar(other) or - (lib.is_scalar(other) and isna(other))): + elif is_null_datetimelike(other): other = tslibs.iNaT elif isinstance(other, self._holder): if other.tz != self.values.tz: @@ -2409,7 +2408,7 @@ def _try_coerce_args(self, values, other): raise ValueError("incompatible or non tz-aware value") other = other.value else: - raise TypeError + raise TypeError(other) return values, other @@ -2560,7 +2559,7 @@ def _try_coerce_args(self, values, other): if isinstance(other, bool): raise TypeError - elif is_null_datelike_scalar(other): + elif is_null_datetimelike(other): other = tslibs.iNaT elif isinstance(other, Timedelta): other = other.value @@ -2573,7 +2572,7 @@ def _try_coerce_args(self, values, other): else: # coercion issues # let higher levels handle - raise TypeError + raise TypeError(other) return values, other diff --git a/pandas/tests/tslibs/test_api.py b/pandas/tests/tslibs/test_api.py index fb9355dfed645..de937d1a4c526 100644 --- a/pandas/tests/tslibs/test_api.py +++ b/pandas/tests/tslibs/test_api.py @@ -23,6 +23,7 @@ def test_namespace(): api = ['NaT', 'iNaT', + 'is_null_datetimelike', 'OutOfBoundsDatetime', 'Period', 'IncompatibleFrequency', From a1ac5b55df7c8e206cc316e2296abf993a1d0054 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 3 Jan 2019 19:40:13 -0800 Subject: [PATCH 2/8] remove asi8, update import --- pandas/core/internals/blocks.py | 4 ---- pandas/core/sparse/series.py | 4 ++-- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index dbb30191878bd..8c5a77f606e42 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2106,10 +2106,6 @@ def get_values(self, dtype=None): return values return self.values - @property - def asi8(self): - return self.values.view('i8') - class DatetimeBlock(DatetimeLikeBlockMixin, Block): __slots__ = () diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 4ea4531c53c72..db4d3e876dec5 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -16,9 +16,9 @@ from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution -from pandas.core.dtypes.common import is_scalar +from pandas.core.dtypes.common import is_integer, is_scalar from pandas.core.dtypes.generic import ABCSeries, ABCSparseSeries -from pandas.core.dtypes.missing import is_integer, isna, notna +from pandas.core.dtypes.missing import isna, notna from pandas.core import generic from pandas.core.arrays import SparseArray From 982f470289c55f9ff9190ef5a3b2bb479e0dae14 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 3 Jan 2019 19:43:43 -0800 Subject: [PATCH 3/8] cleanup import --- pandas/_libs/missing.pyx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index bb7d945bd190e..e922a5d1c3b27 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -12,8 +12,7 @@ cimport pandas._libs.util as util from pandas._libs.tslibs.np_datetime cimport ( get_timedelta64_value, get_datetime64_value) -from pandas._libs.tslibs.nattype cimport checknull_with_nat -from pandas._libs.tslibs.nattype cimport c_NaT +from pandas._libs.tslibs.nattype cimport checknull_with_nat, c_NaT cdef float64_t INF = np.inf cdef float64_t NEGINF = -INF From b0499ba6967e924b1d9f81859c7180099814248d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 4 Jan 2019 06:43:13 -0800 Subject: [PATCH 4/8] dispatch try_coerce to holder.from_sequence --- pandas/core/internals/blocks.py | 46 +++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 8c5a77f606e42..0e819ef87c208 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2207,13 +2207,17 @@ def _try_coerce_args(self, values, other): def _try_coerce_result(self, result): """ reverse of try_coerce_args """ if isinstance(result, np.ndarray): - if result.dtype.kind in ['i', 'f', 'O']: - try: - result = result.astype('M8[ns]') - except ValueError: - pass + try: + arr = self._holder._from_sequence(result.ravel()) + # TODO: tzawareness-compat check? + except ValueError: + pass + else: + result = arr._data.reshape(result.shape) + elif isinstance(result, (np.integer, np.float, np.datetime64)): result = self._box_func(result) + # TODO: np.float is kinda weird here. return result @property @@ -2275,6 +2279,8 @@ def __init__(self, values, placement, ndim=2, dtype=None): # and just use DatetimeBlock's. if dtype is not None: values = self._maybe_coerce_values(values, dtype=dtype) + # TODO: this gets hit in msgpack tests, but in all cases the values + # are already DatetimeArrays with the appropriate dtype super(DatetimeTZBlock, self).__init__(values, placement=placement, ndim=ndim) @@ -2386,6 +2392,7 @@ def _try_coerce_args(self, values, other): if isinstance(other, bool): raise TypeError elif is_datetime64_dtype(other): + # TODO: wont this catch np.datetime64 scalar? # add the tz back other = self._holder(other, dtype=self.dtype) @@ -2411,10 +2418,17 @@ def _try_coerce_args(self, values, other): def _try_coerce_result(self, result): """ reverse of try_coerce_args """ if isinstance(result, np.ndarray): - if result.dtype.kind in ['i', 'f', 'O']: - result = result.astype('M8[ns]') + try: + arr = self._holder._from_sequence(result.ravel()) + # TODO: pass own tz? tzawareness-compat? + except ValueError: + pass + else: + result = arr._data.reshape(result.shape) + elif isinstance(result, (np.integer, np.float, np.datetime64)): result = self._box_func(result) + if isinstance(result, np.ndarray): # allow passing of > 1dim if its trivial @@ -2557,11 +2571,7 @@ def _try_coerce_args(self, values, other): raise TypeError elif is_null_datetimelike(other): other = tslibs.iNaT - elif isinstance(other, Timedelta): - other = other.value - elif isinstance(other, timedelta): - other = Timedelta(other).value - elif isinstance(other, np.timedelta64): + elif isinstance(other, (timedelta, np.timedelta64)): other = Timedelta(other).value elif hasattr(other, 'dtype') and is_timedelta64_dtype(other): other = other.astype('i8', copy=False).view('i8') @@ -2575,12 +2585,16 @@ def _try_coerce_args(self, values, other): def _try_coerce_result(self, result): """ reverse of try_coerce_args / try_operate """ if isinstance(result, np.ndarray): - mask = isna(result) - if result.dtype.kind in ['i', 'f', 'O']: - result = result.astype('m8[ns]') - result[mask] = tslibs.iNaT + try: + arr = self._holder._from_sequence(result.ravel()) + except ValueError: + pass + else: + result = arr._data.reshape(result.shape) + elif isinstance(result, (np.integer, np.float)): result = self._box_func(result) + return result def should_store(self, value): From ba258762d6d1994253d2f40d75b4498634ae6dd5 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 4 Jan 2019 08:21:41 -0800 Subject: [PATCH 5/8] hold off on using from_sequence --- pandas/core/internals/blocks.py | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 5c797b1c72538..53ff0652e558c 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2178,17 +2178,11 @@ def _try_coerce_args(self, values, other): def _try_coerce_result(self, result): """ reverse of try_coerce_args """ if isinstance(result, np.ndarray): - try: - arr = self._holder._from_sequence(result.ravel()) - # TODO: tzawareness-compat check? - except ValueError: - pass - else: - result = arr._data.reshape(result.shape) + if result.dtype.kind in ['i', 'f']: + result = result.astype('M8[ns]') elif isinstance(result, (np.integer, np.float, np.datetime64)): result = self._box_func(result) - # TODO: np.float is kinda weird here. return result @property @@ -2389,13 +2383,8 @@ def _try_coerce_args(self, values, other): def _try_coerce_result(self, result): """ reverse of try_coerce_args """ if isinstance(result, np.ndarray): - try: - arr = self._holder._from_sequence(result.ravel()) - # TODO: pass own tz? tzawareness-compat? - except ValueError: - pass - else: - result = arr._data.reshape(result.shape) + if result.dtype.kind in ['i', 'f']: + result = result.astype('M8[ns]') elif isinstance(result, (np.integer, np.float, np.datetime64)): result = self._box_func(result) @@ -2556,12 +2545,10 @@ def _try_coerce_args(self, values, other): def _try_coerce_result(self, result): """ reverse of try_coerce_args / try_operate """ if isinstance(result, np.ndarray): - try: - arr = self._holder._from_sequence(result.ravel()) - except ValueError: - pass - else: - result = arr._data.reshape(result.shape) + mask = isna(result) + if result.dtype.kind in ['i', 'f']: + result = result.astype('m8[ns]') + result[mask] = tslibs.iNaT elif isinstance(result, (np.integer, np.float)): result = self._box_func(result) From 2577c9a5d5cce36e2c53491b5b9320e6bbd5b914 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 4 Jan 2019 08:29:45 -0800 Subject: [PATCH 6/8] remove comment --- pandas/core/internals/blocks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 53ff0652e558c..c11fa5e20882f 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2357,7 +2357,6 @@ def _try_coerce_args(self, values, other): if isinstance(other, bool): raise TypeError elif is_datetime64_dtype(other): - # TODO: wont this catch np.datetime64 scalar? # add the tz back other = self._holder(other, dtype=self.dtype) From eb7ce62ee7dd4e14dbe39edcaec83874ee82babf Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 4 Jan 2019 09:07:15 -0800 Subject: [PATCH 7/8] isort fixup --- pandas/core/dtypes/missing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index fd94d233f4d14..3c6d3f212342b 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -10,9 +10,9 @@ _NS_DTYPE, _TD_DTYPE, ensure_object, is_bool_dtype, is_complex_dtype, is_datetime64_dtype, is_datetime64tz_dtype, is_datetimelike, is_datetimelike_v_numeric, is_dtype_equal, is_extension_array_dtype, - is_float_dtype, is_integer_dtype, is_object_dtype, - is_period_dtype, is_scalar, is_string_dtype, is_string_like_dtype, - is_timedelta64_dtype, needs_i8_conversion, pandas_dtype) + is_float_dtype, is_integer_dtype, is_object_dtype, is_period_dtype, + is_scalar, is_string_dtype, is_string_like_dtype, is_timedelta64_dtype, + needs_i8_conversion, pandas_dtype) from .generic import ( ABCDatetimeArray, ABCExtensionArray, ABCGeneric, ABCIndexClass, ABCMultiIndex, ABCSeries, ABCTimedeltaArray) From e1033e14dcb967757164ad6ef27d35cc23d8e673 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 4 Jan 2019 09:53:59 -0800 Subject: [PATCH 8/8] remove comment --- pandas/core/internals/blocks.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index c11fa5e20882f..721215538af37 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2244,8 +2244,6 @@ def __init__(self, values, placement, ndim=2, dtype=None): # and just use DatetimeBlock's. if dtype is not None: values = self._maybe_coerce_values(values, dtype=dtype) - # TODO: this gets hit in msgpack tests, but in all cases the values - # are already DatetimeArrays with the appropriate dtype super(DatetimeTZBlock, self).__init__(values, placement=placement, ndim=ndim)