Skip to content

REF: combine isnaobj+isnaobj_old #44505

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pandas/_libs/missing.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ from numpy cimport (

cpdef bint is_matching_na(object left, object right, bint nan_matches_none=*)

cpdef bint checknull(object val)
cpdef bint checknull(object val, bint inf_as_na=*)
cpdef bint checknull_old(object val)
cpdef ndarray[uint8_t] isnaobj(ndarray arr)
cpdef ndarray[uint8_t] isnaobj(ndarray arr, bint inf_as_na=*)

cdef bint is_null_datetime64(v)
cdef bint is_null_timedelta64(v)
Expand Down
134 changes: 11 additions & 123 deletions pandas/_libs/missing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ cpdef bint is_matching_na(object left, object right, bint nan_matches_none=False
return False


cpdef bint checknull(object val):
cpdef bint checknull(object val, bint inf_as_na=False):
"""
Return boolean describing of the input is NA-like, defined here as any
of:
Expand All @@ -114,19 +114,16 @@ cpdef bint checknull(object val):
Parameters
----------
val : object
inf_as_na : bool, default False
Whether to treat INF and -INF as NA values.

Returns
-------
bool

Notes
-----
The difference between `checknull` and `checknull_old` is that `checknull`
does *not* consider INF or NEGINF to be NA.
"""
return (
val is C_NA
or is_null_datetimelike(val, inat_is_null=False)
or is_null_datetimelike(val, inat_is_null=False, inf_as_na=inf_as_na)
or is_decimal_na(val)
)

Expand All @@ -139,42 +136,12 @@ cdef inline bint is_decimal_na(object val):


cpdef bint checknull_old(object val):
"""
Return boolean describing of the input is NA-like, defined here as any
of:
- None
- nan
- INF
- NEGINF
- NaT
- np.datetime64 representation of NaT
- np.timedelta64 representation of NaT
- NA
- Decimal("NaN")

Parameters
----------
val : object

Returns
-------
result : bool

Notes
-----
The difference between `checknull` and `checknull_old` is that `checknull`
does *not* consider INF or NEGINF to be NA.
"""
if checknull(val):
return True
elif util.is_float_object(val) or util.is_complex_object(val):
return val == INF or val == NEGINF
return False
return checknull(val, inf_as_na=True)


@cython.wraparound(False)
@cython.boundscheck(False)
cpdef ndarray[uint8_t] isnaobj(ndarray arr):
cpdef ndarray[uint8_t] isnaobj(ndarray arr, bint inf_as_na=False):
"""
Return boolean mask denoting which elements of a 1-D array are na-like,
according to the criteria defined in `checknull`:
Expand Down Expand Up @@ -205,53 +172,19 @@ cpdef ndarray[uint8_t] isnaobj(ndarray arr):
result = np.empty(n, dtype=np.uint8)
for i in range(n):
val = arr[i]
result[i] = checknull(val)
result[i] = checknull(val, inf_as_na=inf_as_na)
return result.view(np.bool_)


@cython.wraparound(False)
@cython.boundscheck(False)
def isnaobj_old(arr: ndarray) -> ndarray:
"""
Return boolean mask denoting which elements of a 1-D array are na-like,
defined as being any of:
- None
- nan
- INF
- NEGINF
- NaT
- NA
- Decimal("NaN")

Parameters
----------
arr : ndarray

Returns
-------
result : ndarray (dtype=np.bool_)
"""
cdef:
Py_ssize_t i, n
object val
ndarray[uint8_t] result

assert arr.ndim == 1, "'arr' must be 1-D."

n = len(arr)
result = np.zeros(n, dtype=np.uint8)
for i in range(n):
val = arr[i]
result[i] = (
checknull(val)
or util.is_float_object(val) and (val == INF or val == NEGINF)
)
return result.view(np.bool_)
return isnaobj(arr, inf_as_na=True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to just call isnaobj(arr, inf_as_na=True) where isnaobj_old(arr) is called (same for isnaobj2d)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is, im planning to do that in a separate pass



@cython.wraparound(False)
@cython.boundscheck(False)
def isnaobj2d(arr: ndarray) -> ndarray:
def isnaobj2d(arr: ndarray, inf_as_na: bool = False) -> ndarray:
"""
Return boolean mask denoting which elements of a 2-D array are na-like,
according to the criteria defined in `checknull`:
Expand All @@ -270,11 +203,6 @@ def isnaobj2d(arr: ndarray) -> ndarray:
Returns
-------
result : ndarray (dtype=np.bool_)

Notes
-----
The difference between `isnaobj2d` and `isnaobj2d_old` is that `isnaobj2d`
does *not* consider INF or NEGINF to be NA.
"""
cdef:
Py_ssize_t i, j, n, m
Expand All @@ -288,55 +216,15 @@ def isnaobj2d(arr: ndarray) -> ndarray:
for i in range(n):
for j in range(m):
val = arr[i, j]
if checknull(val):
if checknull(val, inf_as_na=inf_as_na):
result[i, j] = 1
return result.view(np.bool_)


@cython.wraparound(False)
@cython.boundscheck(False)
def isnaobj2d_old(arr: ndarray) -> ndarray:
"""
Return boolean mask denoting which elements of a 2-D array are na-like,
according to the criteria defined in `checknull_old`:
- None
- nan
- INF
- NEGINF
- NaT
- np.datetime64 representation of NaT
- np.timedelta64 representation of NaT
- NA
- Decimal("NaN")

Parameters
----------
arr : ndarray

Returns
-------
ndarray (dtype=np.bool_)

Notes
-----
The difference between `isnaobj2d` and `isnaobj2d_old` is that `isnaobj2d`
does *not* consider INF or NEGINF to be NA.
"""
cdef:
Py_ssize_t i, j, n, m
object val
ndarray[uint8_t, ndim=2] result

assert arr.ndim == 2, "'arr' must be 2-D."

n, m = (<object>arr).shape
result = np.zeros((n, m), dtype=np.uint8)
for i in range(n):
for j in range(m):
val = arr[i, j]
if checknull_old(val):
result[i, j] = 1
return result.view(np.bool_)
return isnaobj2d(arr, inf_as_na=True)


def isposinf_scalar(val: object) -> bool:
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/nattype.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ cdef _NaT c_NaT
cdef bint checknull_with_nat(object val)
cdef bint is_dt64nat(object val)
cdef bint is_td64nat(object val)
cpdef bint is_null_datetimelike(object val, bint inat_is_null=*)
cpdef bint is_null_datetimelike(object val, bint inat_is_null=*, bint inf_as_na=*)
4 changes: 3 additions & 1 deletion pandas/_libs/tslibs/nattype.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ NaT: NaTType
iNaT: int
nat_strings: set[str]

def is_null_datetimelike(val: object, inat_is_null: bool = ...) -> bool: ...
def is_null_datetimelike(
val: object, inat_is_null: bool = ..., inf_as_na: bool = ...
) -> bool: ...

class NaTType(datetime):
value: np.int64
Expand Down
19 changes: 17 additions & 2 deletions pandas/_libs/tslibs/nattype.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1201,6 +1201,7 @@ cdef inline bint checknull_with_nat(object val):
"""
return val is None or util.is_nan(val) or val is c_NaT


cdef inline bint is_dt64nat(object val):
"""
Is this a np.datetime64 object np.datetime64("NaT").
Expand All @@ -1209,6 +1210,7 @@ cdef inline bint is_dt64nat(object val):
return get_datetime64_value(val) == NPY_NAT
return False


cdef inline bint is_td64nat(object val):
"""
Is this a np.timedelta64 object np.timedelta64("NaT").
Expand All @@ -1218,7 +1220,14 @@ cdef inline bint is_td64nat(object val):
return False


cpdef bint is_null_datetimelike(object val, bint inat_is_null=True):
cdef:
cnp.float64_t INF = <cnp.float64_t>np.inf
cnp.float64_t NEGINF = -INF


cpdef bint is_null_datetimelike(
object val, bint inat_is_null=True, bint inf_as_na=False
):
"""
Determine if we have a null for a timedelta/datetime (or integer versions).

Expand All @@ -1227,6 +1236,8 @@ cpdef bint is_null_datetimelike(object val, bint inat_is_null=True):
val : object
inat_is_null : bool, default True
Whether to treat integer iNaT value as null
inf_as_na : bool, default False
Whether to treat INF or -INF value as null.

Returns
-------
Expand All @@ -1237,7 +1248,11 @@ cpdef bint is_null_datetimelike(object val, bint inat_is_null=True):
elif val is c_NaT:
return True
elif util.is_float_object(val) or util.is_complex_object(val):
return val != val
if val != val:
return True
if inf_as_na:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

elif but doesn't matter much

return val == INF or val == NEGINF
return False
elif util.is_timedelta64_object(val):
return get_timedelta64_value(val) == NPY_NAT
elif util.is_datetime64_object(val):
Expand Down
7 changes: 2 additions & 5 deletions pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def _isna_array(values: ArrayLike, inf_as_na: bool = False):
if not isinstance(values, np.ndarray):
# i.e. ExtensionArray
if inf_as_na and is_categorical_dtype(dtype):
result = libmissing.isnaobj_old(values.to_numpy())
result = libmissing.isnaobj(values.to_numpy(), inf_as_na=inf_as_na)
else:
result = values.isna()
elif is_string_dtype(dtype):
Expand All @@ -268,10 +268,7 @@ def _isna_string_dtype(values: np.ndarray, inf_as_na: bool) -> np.ndarray:
result = np.zeros(values.shape, dtype=bool)
else:
result = np.empty(shape, dtype=bool)
if inf_as_na:
vec = libmissing.isnaobj_old(values.ravel())
else:
vec = libmissing.isnaobj(values.ravel())
vec = libmissing.isnaobj(values.ravel(), inf_as_na=inf_as_na)

result[...] = vec.reshape(shape)

Expand Down