Open
Description
Right now, I don't believe there's a way for an ExtensionDtype to declare a custom scalar NA value and have pd.isna(scalar)
do the right thing.
_nas = object()
class NaSType(str):
"""
NA for String type.
"""
# TODO: enforce singleton
def __new__(cls, value):
if value is not _nas:
raise ValueError("Cannot create NaS from '{}'".format(value))
return super().__new__(cls, value)
def __eq__(self, other):
# TODO: array comparisons, etc.
return False
def __str__(self):
return "NaS"
def __repr__(self):
return str(self)
NaS = NaSType(_nas)
@register_extension_dtype
class StringDtype(ExtensionDtype):
@property
def na_value(self):
return NaS
@property
def type(self) -> Type:
return str
@property
def name(self) -> str:
return "string"
@classmethod
def construct_from_string(cls, string: str):
if string in {"string", "str"}:
return cls()
return super().construct_from_string(string)
@classmethod
def construct_array_type(cls) -> "Type[StringArray]":
return StringArray
In [18]: NaS
Out[18]: NaS
In [19]: pd.isna(NaS)
Out[19]: False
That should be True
. In https://github.com/pandas-dev/pandas/blob/master/pandas/core/dtypes/missing.py#L131-L132 we go straight to lib missing.checknull(obj)
for scalar values.