From 0a199e613edcb7a527ec85b8ec2e563cbdf6bdf2 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 17 Nov 2021 16:11:06 -0800 Subject: [PATCH 1/2] REF: combine isnaobj+isnaobj_old --- pandas/_libs/missing.pxd | 4 +- pandas/_libs/missing.pyx | 133 +++----------------------------- pandas/_libs/tslibs/nattype.pxd | 2 +- pandas/_libs/tslibs/nattype.pyi | 4 +- pandas/_libs/tslibs/nattype.pyx | 17 +++- pandas/core/dtypes/missing.py | 7 +- 6 files changed, 34 insertions(+), 133 deletions(-) diff --git a/pandas/_libs/missing.pxd b/pandas/_libs/missing.pxd index 9d32fcd3625db..e32518864db0a 100644 --- a/pandas/_libs/missing.pxd +++ b/pandas/_libs/missing.pxd @@ -6,9 +6,9 @@ from numpy cimport ( cpdef bint is_matching_na(object left, object right, bint nan_matches_none=*) -cpdef bint checknull(object val) +cpdef bint checknull(object val, bint inf_as_na=*) cpdef bint checknull_old(object val) -cpdef ndarray[uint8_t] isnaobj(ndarray arr) +cpdef ndarray[uint8_t] isnaobj(ndarray arr, bint inf_as_na=*) cdef bint is_null_datetime64(v) cdef bint is_null_timedelta64(v) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index b77db2aec4a08..d36e90c37e0ac 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -99,7 +99,7 @@ cpdef bint is_matching_na(object left, object right, bint nan_matches_none=False return False -cpdef bint checknull(object val): +cpdef bint checknull(object val, bint inf_as_na=False): """ Return boolean describing of the input is NA-like, defined here as any of: @@ -114,19 +114,16 @@ cpdef bint checknull(object val): Parameters ---------- val : object + inf_as_na : bool, default False + Whether to treat INF and -INF as NA values. Returns ------- bool - - Notes - ----- - The difference between `checknull` and `checknull_old` is that `checknull` - does *not* consider INF or NEGINF to be NA. """ return ( val is C_NA - or is_null_datetimelike(val, inat_is_null=False) + or is_null_datetimelike(val, inat_is_null=False, inf_as_na=inf_as_na) or is_decimal_na(val) ) @@ -139,42 +136,13 @@ cdef inline bint is_decimal_na(object val): cpdef bint checknull_old(object val): - """ - Return boolean describing of the input is NA-like, defined here as any - of: - - None - - nan - - INF - - NEGINF - - NaT - - np.datetime64 representation of NaT - - np.timedelta64 representation of NaT - - NA - - Decimal("NaN") + return checknull(val, inf_as_na=True) - Parameters - ---------- - val : object - - Returns - ------- - result : bool - - Notes - ----- - The difference between `checknull` and `checknull_old` is that `checknull` - does *not* consider INF or NEGINF to be NA. - """ - if checknull(val): - return True - elif util.is_float_object(val) or util.is_complex_object(val): - return val == INF or val == NEGINF - return False @cython.wraparound(False) @cython.boundscheck(False) -cpdef ndarray[uint8_t] isnaobj(ndarray arr): +cpdef ndarray[uint8_t] isnaobj(ndarray arr, bint inf_as_na=False): """ Return boolean mask denoting which elements of a 1-D array are na-like, according to the criteria defined in `checknull`: @@ -205,53 +173,19 @@ cpdef ndarray[uint8_t] isnaobj(ndarray arr): result = np.empty(n, dtype=np.uint8) for i in range(n): val = arr[i] - result[i] = checknull(val) + result[i] = checknull(val, inf_as_na=inf_as_na) return result.view(np.bool_) @cython.wraparound(False) @cython.boundscheck(False) def isnaobj_old(arr: ndarray) -> ndarray: - """ - Return boolean mask denoting which elements of a 1-D array are na-like, - defined as being any of: - - None - - nan - - INF - - NEGINF - - NaT - - NA - - Decimal("NaN") - - Parameters - ---------- - arr : ndarray - - Returns - ------- - result : ndarray (dtype=np.bool_) - """ - cdef: - Py_ssize_t i, n - object val - ndarray[uint8_t] result - - assert arr.ndim == 1, "'arr' must be 1-D." - - n = len(arr) - result = np.zeros(n, dtype=np.uint8) - for i in range(n): - val = arr[i] - result[i] = ( - checknull(val) - or util.is_float_object(val) and (val == INF or val == NEGINF) - ) - return result.view(np.bool_) + return isnaobj(arr, inf_as_na=True) @cython.wraparound(False) @cython.boundscheck(False) -def isnaobj2d(arr: ndarray) -> ndarray: +def isnaobj2d(arr: ndarray, inf_as_na: bool = False) -> ndarray: """ Return boolean mask denoting which elements of a 2-D array are na-like, according to the criteria defined in `checknull`: @@ -270,11 +204,6 @@ def isnaobj2d(arr: ndarray) -> ndarray: Returns ------- result : ndarray (dtype=np.bool_) - - Notes - ----- - The difference between `isnaobj2d` and `isnaobj2d_old` is that `isnaobj2d` - does *not* consider INF or NEGINF to be NA. """ cdef: Py_ssize_t i, j, n, m @@ -288,7 +217,7 @@ def isnaobj2d(arr: ndarray) -> ndarray: for i in range(n): for j in range(m): val = arr[i, j] - if checknull(val): + if checknull(val, inf_as_na=inf_as_na): result[i, j] = 1 return result.view(np.bool_) @@ -296,47 +225,7 @@ def isnaobj2d(arr: ndarray) -> ndarray: @cython.wraparound(False) @cython.boundscheck(False) def isnaobj2d_old(arr: ndarray) -> ndarray: - """ - Return boolean mask denoting which elements of a 2-D array are na-like, - according to the criteria defined in `checknull_old`: - - None - - nan - - INF - - NEGINF - - NaT - - np.datetime64 representation of NaT - - np.timedelta64 representation of NaT - - NA - - Decimal("NaN") - - Parameters - ---------- - arr : ndarray - - Returns - ------- - ndarray (dtype=np.bool_) - - Notes - ----- - The difference between `isnaobj2d` and `isnaobj2d_old` is that `isnaobj2d` - does *not* consider INF or NEGINF to be NA. - """ - cdef: - Py_ssize_t i, j, n, m - object val - ndarray[uint8_t, ndim=2] result - - assert arr.ndim == 2, "'arr' must be 2-D." - - n, m = (arr).shape - result = np.zeros((n, m), dtype=np.uint8) - for i in range(n): - for j in range(m): - val = arr[i, j] - if checknull_old(val): - result[i, j] = 1 - return result.view(np.bool_) + return isnaobj2d(arr, inf_as_na=True) def isposinf_scalar(val: object) -> bool: diff --git a/pandas/_libs/tslibs/nattype.pxd b/pandas/_libs/tslibs/nattype.pxd index 35319bd88053a..0ace3ca1fd4b1 100644 --- a/pandas/_libs/tslibs/nattype.pxd +++ b/pandas/_libs/tslibs/nattype.pxd @@ -18,4 +18,4 @@ cdef _NaT c_NaT cdef bint checknull_with_nat(object val) cdef bint is_dt64nat(object val) cdef bint is_td64nat(object val) -cpdef bint is_null_datetimelike(object val, bint inat_is_null=*) +cpdef bint is_null_datetimelike(object val, bint inat_is_null=*, bint inf_as_na=*) diff --git a/pandas/_libs/tslibs/nattype.pyi b/pandas/_libs/tslibs/nattype.pyi index a7ee9a70342d4..1a33a85a04ae0 100644 --- a/pandas/_libs/tslibs/nattype.pyi +++ b/pandas/_libs/tslibs/nattype.pyi @@ -12,7 +12,9 @@ NaT: NaTType iNaT: int nat_strings: set[str] -def is_null_datetimelike(val: object, inat_is_null: bool = ...) -> bool: ... +def is_null_datetimelike( + val: object, inat_is_null: bool = ..., inf_as_na: bool = ... +) -> bool: ... class NaTType(datetime): value: np.int64 diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 0cbae74ecadac..716fe60beb0b5 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -1201,6 +1201,7 @@ cdef inline bint checknull_with_nat(object val): """ return val is None or util.is_nan(val) or val is c_NaT + cdef inline bint is_dt64nat(object val): """ Is this a np.datetime64 object np.datetime64("NaT"). @@ -1209,6 +1210,7 @@ cdef inline bint is_dt64nat(object val): return get_datetime64_value(val) == NPY_NAT return False + cdef inline bint is_td64nat(object val): """ Is this a np.timedelta64 object np.timedelta64("NaT"). @@ -1218,7 +1220,12 @@ cdef inline bint is_td64nat(object val): return False -cpdef bint is_null_datetimelike(object val, bint inat_is_null=True): +cdef: + cnp.float64_t INF = np.inf + cnp.float64_t NEGINF = -INF + + +cpdef bint is_null_datetimelike(object val, bint inat_is_null=True, bint inf_as_na=False): """ Determine if we have a null for a timedelta/datetime (or integer versions). @@ -1227,6 +1234,8 @@ cpdef bint is_null_datetimelike(object val, bint inat_is_null=True): val : object inat_is_null : bool, default True Whether to treat integer iNaT value as null + inf_as_na : bool, default False + Whether to treat INF or -INF value as null. Returns ------- @@ -1237,7 +1246,11 @@ cpdef bint is_null_datetimelike(object val, bint inat_is_null=True): elif val is c_NaT: return True elif util.is_float_object(val) or util.is_complex_object(val): - return val != val + if val != val: + return True + if inf_as_na: + return val == INF or val == NEGINF + return False elif util.is_timedelta64_object(val): return get_timedelta64_value(val) == NPY_NAT elif util.is_datetime64_object(val): diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index eea3fa37b7435..f5f03279a875d 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -243,7 +243,7 @@ def _isna_array(values: ArrayLike, inf_as_na: bool = False): if not isinstance(values, np.ndarray): # i.e. ExtensionArray if inf_as_na and is_categorical_dtype(dtype): - result = libmissing.isnaobj_old(values.to_numpy()) + result = libmissing.isnaobj(values.to_numpy(), inf_as_na=inf_as_na) else: result = values.isna() elif is_string_dtype(dtype): @@ -269,10 +269,7 @@ def _isna_string_dtype(values: np.ndarray, inf_as_na: bool) -> np.ndarray: result = np.zeros(values.shape, dtype=bool) else: result = np.empty(shape, dtype=bool) - if inf_as_na: - vec = libmissing.isnaobj_old(values.ravel()) - else: - vec = libmissing.isnaobj(values.ravel()) + vec = libmissing.isnaobj(values.ravel(), inf_as_na=inf_as_na) result[...] = vec.reshape(shape) From 9d14b1a1fe84eaa59d6eeb4bd869f1e0f7508820 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 17 Nov 2021 16:34:40 -0800 Subject: [PATCH 2/2] lint fixup --- pandas/_libs/missing.pyx | 1 - pandas/_libs/tslibs/nattype.pyx | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index d36e90c37e0ac..6146e8ea13f89 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -139,7 +139,6 @@ cpdef bint checknull_old(object val): return checknull(val, inf_as_na=True) - @cython.wraparound(False) @cython.boundscheck(False) cpdef ndarray[uint8_t] isnaobj(ndarray arr, bint inf_as_na=False): diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 716fe60beb0b5..ae553d79ae91e 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -1225,7 +1225,9 @@ cdef: cnp.float64_t NEGINF = -INF -cpdef bint is_null_datetimelike(object val, bint inat_is_null=True, bint inf_as_na=False): +cpdef bint is_null_datetimelike( + object val, bint inat_is_null=True, bint inf_as_na=False +): """ Determine if we have a null for a timedelta/datetime (or integer versions).