From 0b9a524bce45f8f5facaa566b6eb715ede4d78af Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 19 Sep 2020 20:03:32 -0700 Subject: [PATCH 1/2] REF: share fillna --- pandas/core/arrays/_mixins.py | 33 +++++++++++++++++++++++ pandas/core/arrays/datetimelike.py | 42 +----------------------------- pandas/core/arrays/numpy_.py | 34 +----------------------- 3 files changed, 35 insertions(+), 74 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index a947ab64f7380..808d598558c83 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -6,7 +6,11 @@ from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly, doc +from pandas.util._validators import validate_fillna_kwargs +from pandas.core.dtypes.inference import is_array_like + +from pandas.core import missing from pandas.core.algorithms import take, unique from pandas.core.array_algos.transforms import shift from pandas.core.arrays.base import ExtensionArray @@ -194,3 +198,32 @@ def __getitem__(self, key): def _validate_getitem_key(self, key): return check_array_indexer(self, key) + + @doc(ExtensionArray.fillna) + def fillna(self: _T, value=None, method=None, limit=None) -> _T: + value, method = validate_fillna_kwargs(value, method) + + mask = self.isna() + + # TODO: share this with EA base class implementation + if is_array_like(value): + if len(value) != len(self): + raise ValueError( + f"Length of 'value' does not match. Got ({len(value)}) " + f" expected {len(self)}" + ) + value = value[mask] + + if mask.any(): + if method is not None: + func = missing.get_fill_func(method) + new_values = func(self._ndarray.copy(), limit=limit, mask=mask) + # TODO: PandasArray didnt used to copy, need tests for this + new_values = self._from_backing_data(new_values) + else: + # fill with value + new_values = self.copy() + new_values[mask] = value + else: + new_values = self.copy() + return new_values diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 45cabe8f0b498..7051507f9a90e 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -28,7 +28,6 @@ from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError, NullFrequencyError, PerformanceWarning from pandas.util._decorators import Appender, Substitution, cache_readonly -from pandas.util._validators import validate_fillna_kwargs from pandas.core.dtypes.common import ( is_categorical_dtype, @@ -48,11 +47,9 @@ is_unsigned_integer_dtype, pandas_dtype, ) -from pandas.core.dtypes.generic import ABCSeries -from pandas.core.dtypes.inference import is_array_like from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna -from pandas.core import missing, nanops, ops +from pandas.core import nanops, ops from pandas.core.algorithms import checked_add_with_arr, unique1d, value_counts from pandas.core.arrays._mixins import NDArrayBackedExtensionArray from pandas.core.arrays.base import ExtensionOpsMixin @@ -979,43 +976,6 @@ def _maybe_mask_results(self, result, fill_value=iNaT, convert=None): result[self._isnan] = fill_value return result - def fillna(self, value=None, method=None, limit=None): - # TODO(GH-20300): remove this - # Just overriding to ensure that we avoid an astype(object). - # Either 20300 or a `_values_for_fillna` would avoid this duplication. - if isinstance(value, ABCSeries): - value = value.array - - value, method = validate_fillna_kwargs(value, method) - - mask = self.isna() - - if is_array_like(value): - if len(value) != len(self): - raise ValueError( - f"Length of 'value' does not match. Got ({len(value)}) " - f" expected {len(self)}" - ) - value = value[mask] - - if mask.any(): - if method is not None: - if method == "pad": - func = missing.pad_1d - else: - func = missing.backfill_1d - - values = self.copy() - new_values = func(values, limit=limit, mask=mask) - new_values = self._from_backing_data(new_values) - else: - # fill with value - new_values = self.copy() - new_values[mask] = value - else: - new_values = self.copy() - return new_values - # ------------------------------------------------------------------ # Frequency Properties/Methods diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index afcae2c5c8b43..61076132b24cd 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -1,5 +1,5 @@ import numbers -from typing import Optional, Tuple, Type, Union +from typing import Tuple, Type, Union import numpy as np from numpy.lib.mixins import NDArrayOperatorsMixin @@ -7,10 +7,8 @@ from pandas._libs import lib from pandas._typing import Scalar from pandas.compat.numpy import function as nv -from pandas.util._validators import validate_fillna_kwargs from pandas.core.dtypes.dtypes import ExtensionDtype -from pandas.core.dtypes.inference import is_array_like from pandas.core.dtypes.missing import isna from pandas import compat @@ -19,7 +17,6 @@ from pandas.core.arrays._mixins import NDArrayBackedExtensionArray from pandas.core.arrays.base import ExtensionOpsMixin from pandas.core.construction import extract_array -from pandas.core.missing import backfill_1d, pad_1d class PandasDtype(ExtensionDtype): @@ -263,35 +260,6 @@ def _validate_setitem_value(self, value): def isna(self) -> np.ndarray: return isna(self._ndarray) - def fillna( - self, value=None, method: Optional[str] = None, limit: Optional[int] = None - ) -> "PandasArray": - # TODO(_values_for_fillna): remove this - value, method = validate_fillna_kwargs(value, method) - - mask = self.isna() - - if is_array_like(value): - if len(value) != len(self): - raise ValueError( - f"Length of 'value' does not match. Got ({len(value)}) " - f" expected {len(self)}" - ) - value = value[mask] - - if mask.any(): - if method is not None: - func = pad_1d if method == "pad" else backfill_1d - new_values = func(self._ndarray, limit=limit, mask=mask) - new_values = self._from_sequence(new_values, dtype=self.dtype) - else: - # fill with value - new_values = self.copy() - new_values[mask] = value - else: - new_values = self.copy() - return new_values - def _validate_fill_value(self, fill_value): if fill_value is None: # Primarily for subclasses From c1673ff6bc34240c174b2ec9555ec09d122dcabd Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 20 Sep 2020 16:14:42 -0700 Subject: [PATCH 2/2] Fix casting period to float --- pandas/core/missing.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 9b96c8f01153b..edcdf2f54bc4c 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -587,7 +587,7 @@ def interpolate_2d( return values -def _cast_values_for_fillna(values, dtype: DtypeObj): +def _cast_values_for_fillna(values, dtype: DtypeObj, has_mask: bool): """ Cast values to a dtype that algos.pad and algos.backfill can handle. """ @@ -597,8 +597,10 @@ def _cast_values_for_fillna(values, dtype: DtypeObj): if needs_i8_conversion(dtype): values = values.view(np.int64) - elif is_integer_dtype(values): + elif is_integer_dtype(values) and not has_mask: # NB: this check needs to come after the datetime64 check above + # has_mask check to avoid casting i8 values that have already + # been cast from PeriodDtype values = ensure_float64(values) return values @@ -609,11 +611,12 @@ def _fillna_prep(values, mask=None, dtype: Optional[DtypeObj] = None): if dtype is None: dtype = values.dtype - if mask is None: + has_mask = mask is not None + if not has_mask: # This needs to occur before datetime/timedeltas are cast to int64 mask = isna(values) - values = _cast_values_for_fillna(values, dtype) + values = _cast_values_for_fillna(values, dtype, has_mask) mask = mask.view(np.uint8) return values, mask