Skip to content

Commit 4b4bf72

Browse files
jbrockmendeljreback
authored andcommitted
catch complex nan in util.is_nan, de-dup+optimize libmissing, tests (#24628)
1 parent eace81c commit 4b4bf72

File tree

5 files changed

+133
-57
lines changed

5 files changed

+133
-57
lines changed

pandas/_libs/missing.pyx

Lines changed: 13 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -12,33 +12,16 @@ cimport pandas._libs.util as util
1212

1313
from pandas._libs.tslibs.np_datetime cimport (
1414
get_timedelta64_value, get_datetime64_value)
15-
from pandas._libs.tslibs.nattype cimport checknull_with_nat, c_NaT
15+
from pandas._libs.tslibs.nattype cimport (
16+
checknull_with_nat, c_NaT as NaT, is_null_datetimelike)
17+
1618

1719
cdef float64_t INF = <float64_t>np.inf
1820
cdef float64_t NEGINF = -INF
1921

2022
cdef int64_t NPY_NAT = util.get_nat()
2123

2224

23-
cdef inline bint _check_all_nulls(object val):
24-
""" utility to check if a value is any type of null """
25-
res: bint
26-
27-
if isinstance(val, (float, complex)):
28-
res = val != val
29-
elif val is c_NaT:
30-
res = 1
31-
elif val is None:
32-
res = 1
33-
elif util.is_datetime64_object(val):
34-
res = get_datetime64_value(val) == NPY_NAT
35-
elif util.is_timedelta64_object(val):
36-
res = get_timedelta64_value(val) == NPY_NAT
37-
else:
38-
res = 0
39-
return res
40-
41-
4225
cpdef bint checknull(object val):
4326
"""
4427
Return boolean describing of the input is NA-like, defined here as any
@@ -62,18 +45,7 @@ cpdef bint checknull(object val):
6245
The difference between `checknull` and `checknull_old` is that `checknull`
6346
does *not* consider INF or NEGINF to be NA.
6447
"""
65-
if util.is_float_object(val) or util.is_complex_object(val):
66-
return val != val # and val != INF and val != NEGINF
67-
elif util.is_datetime64_object(val):
68-
return get_datetime64_value(val) == NPY_NAT
69-
elif val is c_NaT:
70-
return True
71-
elif util.is_timedelta64_object(val):
72-
return get_timedelta64_value(val) == NPY_NAT
73-
elif util.is_array(val):
74-
return False
75-
else:
76-
return val is None or util.is_nan(val)
48+
return is_null_datetimelike(val, inat_is_null=False)
7749

7850

7951
cpdef bint checknull_old(object val):
@@ -101,18 +73,11 @@ cpdef bint checknull_old(object val):
10173
The difference between `checknull` and `checknull_old` is that `checknull`
10274
does *not* consider INF or NEGINF to be NA.
10375
"""
104-
if util.is_float_object(val) or util.is_complex_object(val):
105-
return val != val or val == INF or val == NEGINF
106-
elif util.is_datetime64_object(val):
107-
return get_datetime64_value(val) == NPY_NAT
108-
elif val is c_NaT:
76+
if checknull(val):
10977
return True
110-
elif util.is_timedelta64_object(val):
111-
return get_timedelta64_value(val) == NPY_NAT
112-
elif util.is_array(val):
113-
return False
114-
else:
115-
return val is None or util.is_nan(val)
78+
elif util.is_float_object(val) or util.is_complex_object(val):
79+
return val == INF or val == NEGINF
80+
return False
11681

11782

11883
cdef inline bint _check_none_nan_inf_neginf(object val):
@@ -128,7 +93,7 @@ cdef inline bint _check_none_nan_inf_neginf(object val):
12893
cpdef ndarray[uint8_t] isnaobj(ndarray arr):
12994
"""
13095
Return boolean mask denoting which elements of a 1-D array are na-like,
131-
according to the criteria defined in `_check_all_nulls`:
96+
according to the criteria defined in `checknull`:
13297
- None
13398
- nan
13499
- NaT
@@ -154,7 +119,7 @@ cpdef ndarray[uint8_t] isnaobj(ndarray arr):
154119
result = np.empty(n, dtype=np.uint8)
155120
for i in range(n):
156121
val = arr[i]
157-
result[i] = _check_all_nulls(val)
122+
result[i] = checknull(val)
158123
return result.view(np.bool_)
159124

160125

@@ -189,7 +154,7 @@ def isnaobj_old(ndarray arr):
189154
result = np.zeros(n, dtype=np.uint8)
190155
for i in range(n):
191156
val = arr[i]
192-
result[i] = val is c_NaT or _check_none_nan_inf_neginf(val)
157+
result[i] = val is NaT or _check_none_nan_inf_neginf(val)
193158
return result.view(np.bool_)
194159

195160

@@ -299,7 +264,7 @@ cdef inline bint is_null_datetime64(v):
299264
if checknull_with_nat(v):
300265
return True
301266
elif util.is_datetime64_object(v):
302-
return v.view('int64') == NPY_NAT
267+
return get_datetime64_value(v) == NPY_NAT
303268
return False
304269

305270

@@ -309,7 +274,7 @@ cdef inline bint is_null_timedelta64(v):
309274
if checknull_with_nat(v):
310275
return True
311276
elif util.is_timedelta64_object(v):
312-
return v.view('int64') == NPY_NAT
277+
return get_timedelta64_value(v) == NPY_NAT
313278
return False
314279

315280

pandas/_libs/tslibs/nattype.pxd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,4 @@ cdef _NaT c_NaT
1717

1818

1919
cdef bint checknull_with_nat(object val)
20-
cpdef bint is_null_datetimelike(object val)
20+
cpdef bint is_null_datetimelike(object val, bint inat_is_null=*)

pandas/_libs/tslibs/nattype.pyx

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ cimport numpy as cnp
1414
from numpy cimport int64_t
1515
cnp.import_array()
1616

17+
from pandas._libs.tslibs.np_datetime cimport (
18+
get_datetime64_value, get_timedelta64_value)
1719
cimport pandas._libs.tslibs.util as util
1820
from pandas._libs.tslibs.util cimport (
1921
get_nat, is_integer_object, is_float_object, is_datetime64_object,
@@ -686,26 +688,30 @@ cdef inline bint checknull_with_nat(object val):
686688
return val is None or util.is_nan(val) or val is c_NaT
687689

688690

689-
cpdef bint is_null_datetimelike(object val):
691+
cpdef bint is_null_datetimelike(object val, bint inat_is_null=True):
690692
"""
691693
Determine if we have a null for a timedelta/datetime (or integer versions)
692694
693695
Parameters
694696
----------
695697
val : object
698+
inat_is_null : bool, default True
699+
Whether to treat integer iNaT value as null
696700
697701
Returns
698702
-------
699703
null_datetimelike : bool
700704
"""
701-
if val is None or util.is_nan(val):
705+
if val is None:
702706
return True
703707
elif val is c_NaT:
704708
return True
709+
elif util.is_float_object(val) or util.is_complex_object(val):
710+
return val != val
705711
elif util.is_timedelta64_object(val):
706-
return val.view('int64') == NPY_NAT
712+
return get_timedelta64_value(val) == NPY_NAT
707713
elif util.is_datetime64_object(val):
708-
return val.view('int64') == NPY_NAT
709-
elif util.is_integer_object(val):
714+
return get_datetime64_value(val) == NPY_NAT
715+
elif inat_is_null and util.is_integer_object(val):
710716
return val == NPY_NAT
711717
return False

pandas/_libs/tslibs/util.pxd

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,8 @@ cdef inline bint is_offset_object(object val):
215215

216216
cdef inline bint is_nan(object val):
217217
"""
218-
Check if val is a Not-A-Number float, including float('NaN') and np.nan.
218+
Check if val is a Not-A-Number float or complex, including
219+
float('NaN') and np.nan.
219220
220221
Parameters
221222
----------
@@ -225,4 +226,4 @@ cdef inline bint is_nan(object val):
225226
-------
226227
is_nan : bool
227228
"""
228-
return is_float_object(val) and val != val
229+
return (is_float_object(val) or is_complex_object(val)) and val != val

pandas/tests/dtypes/test_missing.py

Lines changed: 105 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
# -*- coding: utf-8 -*-
22

33
from datetime import datetime
4+
from decimal import Decimal
45
from warnings import catch_warnings, filterwarnings, simplefilter
56

67
import numpy as np
78
import pytest
89

910
from pandas._libs import missing as libmissing
10-
from pandas._libs.tslib import iNaT
11+
from pandas._libs.tslibs import iNaT, is_null_datetimelike
1112
from pandas.compat import u
1213

1314
from pandas.core.dtypes.common import is_scalar
@@ -392,3 +393,106 @@ def test_empty_like(self):
392393
expected = np.array([True])
393394

394395
self._check_behavior(arr, expected)
396+
397+
398+
m8_units = ['as', 'ps', 'ns', 'us', 'ms', 's',
399+
'm', 'h', 'D', 'W', 'M', 'Y']
400+
401+
na_vals = [
402+
None,
403+
NaT,
404+
float('NaN'),
405+
complex('NaN'),
406+
np.nan,
407+
np.float64('NaN'),
408+
np.float32('NaN'),
409+
np.complex64(np.nan),
410+
np.complex128(np.nan),
411+
np.datetime64('NaT'),
412+
np.timedelta64('NaT'),
413+
] + [
414+
np.datetime64('NaT', unit) for unit in m8_units
415+
] + [
416+
np.timedelta64('NaT', unit) for unit in m8_units
417+
]
418+
419+
inf_vals = [
420+
float('inf'),
421+
float('-inf'),
422+
complex('inf'),
423+
complex('-inf'),
424+
np.inf,
425+
np.NINF,
426+
]
427+
428+
int_na_vals = [
429+
# Values that match iNaT, which we treat as null in specific cases
430+
np.int64(NaT.value),
431+
int(NaT.value),
432+
]
433+
434+
sometimes_na_vals = [
435+
Decimal('NaN'),
436+
]
437+
438+
never_na_vals = [
439+
# float/complex values that when viewed as int64 match iNaT
440+
-0.0,
441+
np.float64('-0.0'),
442+
-0j,
443+
np.complex64(-0j),
444+
]
445+
446+
447+
class TestLibMissing(object):
448+
def test_checknull(self):
449+
for value in na_vals:
450+
assert libmissing.checknull(value)
451+
452+
for value in inf_vals:
453+
assert not libmissing.checknull(value)
454+
455+
for value in int_na_vals:
456+
assert not libmissing.checknull(value)
457+
458+
for value in sometimes_na_vals:
459+
assert not libmissing.checknull(value)
460+
461+
for value in never_na_vals:
462+
assert not libmissing.checknull(value)
463+
464+
def checknull_old(self):
465+
for value in na_vals:
466+
assert libmissing.checknull_old(value)
467+
468+
for value in inf_vals:
469+
assert libmissing.checknull_old(value)
470+
471+
for value in int_na_vals:
472+
assert not libmissing.checknull_old(value)
473+
474+
for value in sometimes_na_vals:
475+
assert not libmissing.checknull_old(value)
476+
477+
for value in never_na_vals:
478+
assert not libmissing.checknull_old(value)
479+
480+
def test_is_null_datetimelike(self):
481+
for value in na_vals:
482+
assert is_null_datetimelike(value)
483+
assert is_null_datetimelike(value, False)
484+
485+
for value in inf_vals:
486+
assert not is_null_datetimelike(value)
487+
assert not is_null_datetimelike(value, False)
488+
489+
for value in int_na_vals:
490+
assert is_null_datetimelike(value)
491+
assert not is_null_datetimelike(value, False)
492+
493+
for value in sometimes_na_vals:
494+
assert not is_null_datetimelike(value)
495+
assert not is_null_datetimelike(value, False)
496+
497+
for value in never_na_vals:
498+
assert not is_null_datetimelike(value)

0 commit comments

Comments
 (0)