-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
ENH: add NA scalar for missing value indicator, use in StringArray. #29597
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 17 commits
03f83bd
c1797d5
3339eaa
e9d4d6a
4450d2d
1849a23
c72e3ee
3a97782
2302661
2ab592a
018399e
31290b9
33fd3e0
289c885
22de7cd
f8208db
371eeeb
1cadeda
1fcf4b7
f6798e5
14c1434
788a2c2
1bcbab2
775cdfb
589a961
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -68,6 +68,7 @@ | |
DatetimeTZDtype, | ||
StringDtype, | ||
# missing | ||
NA, | ||
isna, | ||
isnull, | ||
notna, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,8 @@ | ||
import cython | ||
from cython import Py_ssize_t | ||
|
||
import numbers | ||
|
||
import numpy as np | ||
cimport numpy as cnp | ||
from numpy cimport ndarray, int64_t, uint8_t, float64_t | ||
|
@@ -44,7 +46,7 @@ cpdef bint checknull(object val): | |
The difference between `checknull` and `checknull_old` is that `checknull` | ||
does *not* consider INF or NEGINF to be NA. | ||
""" | ||
return is_null_datetimelike(val, inat_is_null=False) | ||
return val is C_NA or is_null_datetimelike(val, inat_is_null=False) | ||
|
||
|
||
cpdef bint checknull_old(object val): | ||
|
@@ -278,3 +280,137 @@ cdef inline bint is_null_period(v): | |
# determine if we have a null for a Period (or integer versions), | ||
# excluding np.datetime64('nat') and np.timedelta64('nat') | ||
return checknull_with_nat(v) | ||
|
||
|
||
# ----------------------------------------------------------------------------- | ||
# Implementation of NA singleton | ||
|
||
|
||
def _create_binary_propagating_op(name, divmod=False): | ||
|
||
def method(self, other): | ||
if isinstance(other, numbers.Number) or other is NA or isinstance(other, str): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you put the numbers.Number check last, as it will be least performant |
||
if divmod: | ||
return NA, NA | ||
else: | ||
return NA | ||
|
||
return NotImplemented | ||
|
||
method.__name__ = name | ||
return method | ||
|
||
|
||
def _create_unary_propagating_op(name): | ||
def method(self): | ||
return NA | ||
|
||
method.__name__ = name | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return method | ||
|
||
|
||
cdef class C_NAType: | ||
pass | ||
|
||
|
||
class NAType(C_NAType): | ||
""" | ||
NA ("not available") missing value indicator. | ||
|
||
.. warning:: | ||
|
||
Experimental: the behaviour of NA can still change without warning. | ||
|
||
.. versionadded:: 1.0.0 | ||
|
||
The NA singleton is a missing value indicator defined by pandas. It is | ||
used in certain new extension dtypes (currently the "string" dtype). | ||
""" | ||
jorisvandenbossche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
_instance = None | ||
|
||
def __new__(cls, *args, **kwargs): | ||
if NAType._instance is None: | ||
NAType._instance = C_NAType.__new__(cls, *args, **kwargs) | ||
return NAType._instance | ||
|
||
def __repr__(self) -> str: | ||
return "NA" | ||
|
||
def __str__(self) -> str: | ||
return "NA" | ||
|
||
def __bool__(self): | ||
raise TypeError("boolean value of NA is ambiguous") | ||
jorisvandenbossche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def __hash__(self): | ||
return id(self) | ||
|
||
# Binary arithmetic and comparison ops -> propagate | ||
|
||
__add__ = _create_binary_propagating_op("__add__") | ||
__radd__ = _create_binary_propagating_op("__radd__") | ||
__sub__ = _create_binary_propagating_op("__sub__") | ||
__rsub__ = _create_binary_propagating_op("__rsub__") | ||
__mul__ = _create_binary_propagating_op("__mul__") | ||
__rmul__ = _create_binary_propagating_op("__rmul__") | ||
__matmul__ = _create_binary_propagating_op("__matmul__") | ||
__rmatmul__ = _create_binary_propagating_op("__rmatmul__") | ||
__truediv__ = _create_binary_propagating_op("__truediv__") | ||
__rtruediv__ = _create_binary_propagating_op("__rtruediv__") | ||
__floordiv__ = _create_binary_propagating_op("__floordiv__") | ||
__rfloordiv__ = _create_binary_propagating_op("__rfloordiv__") | ||
__mod__ = _create_binary_propagating_op("__mod__") | ||
__rmod__ = _create_binary_propagating_op("__rmod__") | ||
__divmod__ = _create_binary_propagating_op("__divmod__", divmod=True) | ||
__rdivmod__ = _create_binary_propagating_op("__rdivmod__", divmod=True) | ||
__pow__ = _create_binary_propagating_op("__pow__") | ||
__rpow__ = _create_binary_propagating_op("__rpow__") | ||
# __lshift__ and __rshift__ are not implemented | ||
|
||
__eq__ = _create_binary_propagating_op("__eq__") | ||
__ne__ = _create_binary_propagating_op("__ne__") | ||
__le__ = _create_binary_propagating_op("__le__") | ||
__lt__ = _create_binary_propagating_op("__lt__") | ||
__gt__ = _create_binary_propagating_op("__gt__") | ||
__ge__ = _create_binary_propagating_op("__ge__") | ||
|
||
# Unary ops | ||
|
||
__neg__ = _create_unary_propagating_op("__neg__") | ||
__pos__ = _create_unary_propagating_op("__pos__") | ||
__abs__ = _create_unary_propagating_op("__abs__") | ||
__invert__ = _create_unary_propagating_op("__invert__") | ||
|
||
# Logical ops using Kleene logic | ||
|
||
def __and__(self, other): | ||
if other is False: | ||
return False | ||
elif other is True or other is NA: | ||
return NA | ||
else: | ||
return NotImplemented | ||
|
||
__rand__ = __and__ | ||
|
||
def __or__(self, other): | ||
if other is True: | ||
return True | ||
elif other is False or other is NA: | ||
return NA | ||
else: | ||
return NotImplemented | ||
|
||
__ror__ = __or__ | ||
|
||
def __xor__(self, other): | ||
if other is False or other is True or other is NA: | ||
return NA | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return NotImplemented | ||
|
||
__rxor__ = __xor__ | ||
|
||
|
||
C_NA = NAType() # C-visible | ||
NA = C_NA # Python-visible |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -180,13 +180,15 @@ cpdef assert_almost_equal(a, b, | |
# classes can't be the same, to raise error | ||
assert_class_equal(a, b, obj=obj) | ||
|
||
if a == b: | ||
# object comparison | ||
return True | ||
if isna(a) and isna(b): | ||
# TODO: Should require same-dtype NA? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this might be a reasonable time to start enforcing same-NA There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Open a separate issue for it? |
||
# nan / None comparison | ||
return True | ||
|
||
if a == b: | ||
# object comparison | ||
return True | ||
|
||
if is_comparable_as_number(a) and is_comparable_as_number(b): | ||
if array_equivalent(a, b, strict_nan=True): | ||
# inf comparison | ||
|
Uh oh!
There was an error while loading. Please reload this page.