From 659895f290afe8ceafcecceba3c9bfbbe741a0fb Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 11 Jun 2020 13:47:28 +0100 Subject: [PATCH 01/10] interpolate_1d returns function --- pandas/core/internals/blocks.py | 26 ++--- pandas/core/missing.py | 199 ++++++++++++++------------------ 2 files changed, 98 insertions(+), 127 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index e2a778f729470..c390d48ee23d9 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1194,22 +1194,16 @@ def _interpolate( ) # process 1-d slices in the axis direction - def func(yvalues: np.ndarray) -> np.ndarray: - - # process a 1-d slice, returning it - # should the axis argument be handled below in apply_along_axis? - # i.e. not an arg to missing.interpolate_1d - return missing.interpolate_1d( - xvalues=index, - yvalues=yvalues, - method=method, - limit=limit, - limit_direction=limit_direction, - limit_area=limit_area, - fill_value=fill_value, - bounds_error=False, - **kwargs, - ) + func = missing.interpolate_1d( + xvalues=index, + method=method, + limit=limit, + limit_direction=limit_direction, + limit_area=limit_area, + fill_value=fill_value, + bounds_error=False, + **kwargs, + ) # interp each column independently interp_values = np.apply_along_axis(func, axis, data) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index d8671616f944e..b0b256ca0a3ff 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -2,7 +2,7 @@ Routines for filling missing data. """ -from typing import Any, List, Optional, Set, Union +from typing import Any, Callable, List, Optional, Set, Union import numpy as np @@ -96,26 +96,23 @@ def clean_fill_method(method, allow_nearest=False): def clean_interp_method(method: str, **kwargs) -> str: order = kwargs.get("order") - valid = [ - "linear", - "time", - "index", - "values", + sp_methods = [ "nearest", "zero", "slinear", "quadratic", "cubic", "barycentric", - "polynomial", "krogh", + "spline", + "polynomial", + "from_derivatives", "piecewise_polynomial", "pchip", "akima", - "spline", - "from_derivatives", "cubicspline", ] + valid = ["linear", "time", "index", "values"] + sp_methods if method in ("spline", "polynomial") and order is None: raise ValueError("You must specify the order of the spline or polynomial.") if method not in valid: @@ -163,7 +160,6 @@ def find_valid_index(values, how: str): def interpolate_1d( xvalues: np.ndarray, - yvalues: np.ndarray, method: Optional[str] = "linear", limit: Optional[int] = None, limit_direction: str = "forward", @@ -172,7 +168,7 @@ def interpolate_1d( bounds_error: bool = False, order: Optional[int] = None, **kwargs, -): +) -> Callable[[np.ndarray], np.ndarray]: """ Logic for the 1-d interpolation. The result should be 1-d, inputs xvalues and yvalues will each be 1-d arrays of the same length. @@ -182,19 +178,6 @@ def interpolate_1d( """ # Treat the original, non-scipy methods first. - invalid = isna(yvalues) - valid = ~invalid - - if not valid.any(): - # have to call np.asarray(xvalues) since xvalues could be an Index - # which can't be mutated - result = np.empty_like(np.asarray(xvalues), dtype=np.float64) - result.fill(np.nan) - return result - - if valid.all(): - return yvalues - if method == "time": if not getattr(xvalues, "is_all_dates", None): # if not issubclass(xvalues.dtype.type, np.datetime64): @@ -225,98 +208,92 @@ def interpolate_1d( # default limit is unlimited GH #16282 limit = algos._validate_limit(nobs=None, limit=limit) - # These are sets of index pointers to invalid values... i.e. {0, 1, etc... - all_nans = set(np.flatnonzero(invalid)) - start_nans = set(range(find_valid_index(yvalues, "first"))) - end_nans = set(range(1 + find_valid_index(yvalues, "last"), len(valid))) - mid_nans = all_nans - start_nans - end_nans - - # Like the sets above, preserve_nans contains indices of invalid values, - # but in this case, it is the final set of indices that need to be - # preserved as NaN after the interpolation. - - # For example if limit_direction='forward' then preserve_nans will - # contain indices of NaNs at the beginning of the series, and NaNs that - # are more than'limit' away from the prior non-NaN. - - # set preserve_nans based on direction using _interp_limit - preserve_nans: Union[List, Set] - if limit_direction == "forward": - preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) - elif limit_direction == "backward": - preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit)) - else: - # both directions... just use _interp_limit - preserve_nans = set(_interp_limit(invalid, limit, limit)) - - # if limit_area is set, add either mid or outside indices - # to preserve_nans GH #16284 - if limit_area == "inside": - # preserve NaNs on the outside - preserve_nans |= start_nans | end_nans - elif limit_area == "outside": - # preserve NaNs on the inside - preserve_nans |= mid_nans - - # sort preserve_nans and covert to list - preserve_nans = sorted(preserve_nans) - xvalues = getattr(xvalues, "values", xvalues) - yvalues = getattr(yvalues, "values", yvalues) - result = yvalues.copy() - - if method in ["linear", "time", "index", "values"]: - if method in ("values", "index"): - inds = np.asarray(xvalues) - # hack for DatetimeIndex, #1646 - if needs_i8_conversion(inds.dtype): - inds = inds.view(np.int64) - if inds.dtype == np.object_: - inds = lib.maybe_convert_objects(inds) + + inds = np.asarray(xvalues) + + # hack for DatetimeIndex, #1646 + if method != "linear" and needs_i8_conversion(inds.dtype): + inds = inds.view(np.int64) + + if method in ("values", "index"): + if inds.dtype == np.object_: + inds = lib.maybe_convert_objects(inds) + + def func(yvalues: np.ndarray) -> np.ndarray: + invalid = isna(yvalues) + valid = ~invalid + + if not valid.any(): + # have to call np.asarray(xvalues) since xvalues could be an Index + # which can't be mutated + result = np.empty_like(np.asarray(xvalues), dtype=np.float64) + result.fill(np.nan) + return result + + if valid.all(): + return yvalues + + # These are sets of index pointers to invalid values... i.e. {0, 1, etc... + all_nans = set(np.flatnonzero(invalid)) + start_nans = set(range(find_valid_index(yvalues, "first"))) + end_nans = set(range(1 + find_valid_index(yvalues, "last"), len(valid))) + mid_nans = all_nans - start_nans - end_nans + + # Like the sets above, preserve_nans contains indices of invalid values, + # but in this case, it is the final set of indices that need to be + # preserved as NaN after the interpolation. + + # For example if limit_direction='forward' then preserve_nans will + # contain indices of NaNs at the beginning of the series, and NaNs that + # are more than'limit' away from the prior non-NaN. + + # set preserve_nans based on direction using _interp_limit + preserve_nans: Union[List, Set] + if limit_direction == "forward": + preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) + elif limit_direction == "backward": + preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit)) else: - inds = xvalues - # np.interp requires sorted X values, #21037 - indexer = np.argsort(inds[valid]) - result[invalid] = np.interp( - inds[invalid], inds[valid][indexer], yvalues[valid][indexer] - ) + # both directions... just use _interp_limit + preserve_nans = set(_interp_limit(invalid, limit, limit)) + + # if limit_area is set, add either mid or outside indices + # to preserve_nans GH #16284 + if limit_area == "inside": + # preserve NaNs on the outside + preserve_nans |= start_nans | end_nans + elif limit_area == "outside": + # preserve NaNs on the inside + preserve_nans |= mid_nans + + # sort preserve_nans and covert to list + preserve_nans = sorted(preserve_nans) + + yvalues = getattr(yvalues, "values", yvalues) + result = yvalues.copy() + + if method in ["linear", "index", "values"]: + # np.interp requires sorted X values, #21037 + indexer = np.argsort(inds[valid]) + result[invalid] = np.interp( + inds[invalid], inds[valid][indexer], yvalues[valid][indexer] + ) + else: + result[invalid] = _interpolate_scipy_wrapper( + inds[valid], + yvalues[valid], + inds[invalid], + method=method, + fill_value=fill_value, + bounds_error=bounds_error, + order=order, + **kwargs, + ) result[preserve_nans] = np.nan return result - sp_methods = [ - "nearest", - "zero", - "slinear", - "quadratic", - "cubic", - "barycentric", - "krogh", - "spline", - "polynomial", - "from_derivatives", - "piecewise_polynomial", - "pchip", - "akima", - "cubicspline", - ] - - if method in sp_methods: - inds = np.asarray(xvalues) - # hack for DatetimeIndex, #1646 - if issubclass(inds.dtype.type, np.datetime64): - inds = inds.view(np.int64) - result[invalid] = _interpolate_scipy_wrapper( - inds[valid], - yvalues[valid], - inds[invalid], - method=method, - fill_value=fill_value, - bounds_error=bounds_error, - order=order, - **kwargs, - ) - result[preserve_nans] = np.nan - return result + return func def _interpolate_scipy_wrapper( From 810767ad29dd21a420da720a3c67fe80a6ea9703 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sun, 14 Jun 2020 10:14:22 +0100 Subject: [PATCH 02/10] fixup whitespace from merge --- pandas/core/missing.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 9092b97343aa2..7ebab4aee0a6e 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -120,6 +120,7 @@ def clean_fill_method(method, allow_nearest=False): def clean_interp_method(method: str, **kwargs) -> str: order = kwargs.get("order") + if method in ("spline", "polynomial") and order is None: raise ValueError("You must specify the order of the spline or polynomial.") @@ -301,6 +302,7 @@ def func(yvalues: np.ndarray) -> np.ndarray: order=order, **kwargs, ) + result[preserve_nans] = np.nan return result From e676a1042d4098ff4101d090aaf04897821afd00 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 4 Jul 2020 20:24:32 +0100 Subject: [PATCH 03/10] use class instead --- pandas/core/internals/blocks.py | 4 +- pandas/core/missing.py | 163 +++++++++++++++++--------------- 2 files changed, 89 insertions(+), 78 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 3a8b860d15ad7..728cd045b9d03 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1212,7 +1212,7 @@ def _interpolate( ) # process 1-d slices in the axis direction - func = missing.interpolate_1d( + func = missing.Interpolator1d( xvalues=index, method=method, limit=limit, @@ -1221,7 +1221,7 @@ def _interpolate( fill_value=fill_value, bounds_error=False, **kwargs, - ) + ).interpolate # interp each column independently interp_values = np.apply_along_axis(func, axis, data) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 7ebab4aee0a6e..1ba08294a8de2 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -2,7 +2,7 @@ Routines for filling missing data. """ -from typing import Any, Callable, List, Optional, Set, Union +from typing import Any, List, Optional, Set, Union import numpy as np @@ -168,17 +168,7 @@ def find_valid_index(values, how: str): return idxpos -def interpolate_1d( - xvalues: np.ndarray, - method: Optional[str] = "linear", - limit: Optional[int] = None, - limit_direction: str = "forward", - limit_area: Optional[str] = None, - fill_value: Optional[Any] = None, - bounds_error: bool = False, - order: Optional[int] = None, - **kwargs, -) -> Callable[[np.ndarray], np.ndarray]: +class Interpolator1d: """ Logic for the 1-d interpolation. The result should be 1-d, inputs xvalues and yvalues will each be 1-d arrays of the same length. @@ -186,60 +176,81 @@ def interpolate_1d( Bounds_error is currently hardcoded to False since non-scipy ones don't take it as an argument. """ - if method == "time": - if not getattr(xvalues, "is_all_dates", None): - # if not issubclass(xvalues.dtype.type, np.datetime64): - raise ValueError( - "time-weighted interpolation only works " - "on Series or DataFrames with a " - "DatetimeIndex" - ) - method = "values" - - valid_limit_directions = ["forward", "backward", "both"] - limit_direction = limit_direction.lower() - if limit_direction not in valid_limit_directions: - raise ValueError( - "Invalid limit_direction: expecting one of " - f"{valid_limit_directions}, got '{limit_direction}'." - ) - if limit_area is not None: - valid_limit_areas = ["inside", "outside"] - limit_area = limit_area.lower() - if limit_area not in valid_limit_areas: + def __init__( + self, + xvalues: np.ndarray, + method: Optional[str] = "linear", + limit: Optional[int] = None, + limit_direction: str = "forward", + limit_area: Optional[str] = None, + fill_value: Optional[Any] = None, + bounds_error: bool = False, + order: Optional[int] = None, + **kwargs, + ): + if method == "time": + if not getattr(xvalues, "is_all_dates", None): + # if not issubclass(xvalues.dtype.type, np.datetime64): + raise ValueError( + "time-weighted interpolation only works " + "on Series or DataFrames with a " + "DatetimeIndex" + ) + method = "values" + self.method = method + + valid_limit_directions = ["forward", "backward", "both"] + limit_direction = limit_direction.lower() + if limit_direction not in valid_limit_directions: raise ValueError( - f"Invalid limit_area: expecting one of {valid_limit_areas}, got " - f"{limit_area}." + "Invalid limit_direction: expecting one of " + f"{valid_limit_directions}, got '{limit_direction}'." ) - - # default limit is unlimited GH #16282 - limit = algos._validate_limit(nobs=None, limit=limit) - - # xvalues to pass to NumPy/SciPy - - xvalues = getattr(xvalues, "values", xvalues) - if method == "linear": - inds = xvalues - else: - inds = np.asarray(xvalues) - - # hack for DatetimeIndex, #1646 - if needs_i8_conversion(inds.dtype): - inds = inds.view(np.int64) - - if method in ("values", "index"): - if inds.dtype == np.object_: - inds = lib.maybe_convert_objects(inds) - - def func(yvalues: np.ndarray) -> np.ndarray: + self.limit_direction = limit_direction + + if limit_area is not None: + valid_limit_areas = ["inside", "outside"] + limit_area = limit_area.lower() + if limit_area not in valid_limit_areas: + raise ValueError( + f"Invalid limit_area: expecting one of {valid_limit_areas}, got " + f"{limit_area}." + ) + self.limit_area = limit_area + + # default limit is unlimited GH #16282 + self.limit = algos._validate_limit(nobs=None, limit=limit) + + # xvalues to pass to NumPy/SciPy + + xvalues = getattr(xvalues, "values", xvalues) + if method == "linear": + inds = xvalues + else: + inds = np.asarray(xvalues) + + # hack for DatetimeIndex, #1646 + if needs_i8_conversion(inds.dtype): + inds = inds.view(np.int64) + + if method in ("values", "index"): + if inds.dtype == np.object_: + inds = lib.maybe_convert_objects(inds) + self.xvalues = inds + self.fill_value = fill_value + self.bounds_error = bounds_error + self.order = order + self.kwargs = kwargs + + def interpolate(self, yvalues: np.ndarray) -> np.ndarray: invalid = isna(yvalues) valid = ~invalid if not valid.any(): # have to call np.asarray(xvalues) since xvalues could be an Index # which can't be mutated - result = np.empty_like(np.asarray(xvalues), dtype=np.float64) + result = np.empty_like(np.asarray(self.xvalues), dtype=np.float64) result.fill(np.nan) return result @@ -262,20 +273,20 @@ def func(yvalues: np.ndarray) -> np.ndarray: # set preserve_nans based on direction using _interp_limit preserve_nans: Union[List, Set] - if limit_direction == "forward": - preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) - elif limit_direction == "backward": - preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit)) + if self.limit_direction == "forward": + preserve_nans = start_nans | set(_interp_limit(invalid, self.limit, 0)) + elif self.limit_direction == "backward": + preserve_nans = end_nans | set(_interp_limit(invalid, 0, self.limit)) else: # both directions... just use _interp_limit - preserve_nans = set(_interp_limit(invalid, limit, limit)) + preserve_nans = set(_interp_limit(invalid, self.limit, self.limit)) # if limit_area is set, add either mid or outside indices # to preserve_nans GH #16284 - if limit_area == "inside": + if self.limit_area == "inside": # preserve NaNs on the outside preserve_nans |= start_nans | end_nans - elif limit_area == "outside": + elif self.limit_area == "outside": # preserve NaNs on the inside preserve_nans |= mid_nans @@ -285,29 +296,29 @@ def func(yvalues: np.ndarray) -> np.ndarray: yvalues = getattr(yvalues, "values", yvalues) result = yvalues.copy() - if method in NP_METHODS: + if self.method in NP_METHODS: # np.interp requires sorted X values, #21037 - indexer = np.argsort(inds[valid]) + indexer = np.argsort(self.xvalues[valid]) result[invalid] = np.interp( - inds[invalid], inds[valid][indexer], yvalues[valid][indexer] + self.xvalues[invalid], + self.xvalues[valid][indexer], + yvalues[valid][indexer], ) else: result[invalid] = _interpolate_scipy_wrapper( - inds[valid], + self.xvalues[valid], yvalues[valid], - inds[invalid], - method=method, - fill_value=fill_value, - bounds_error=bounds_error, - order=order, - **kwargs, + self.xvalues[invalid], + method=self.method, + fill_value=self.fill_value, + bounds_error=self.bounds_error, + order=self.order, + **self.kwargs, ) result[preserve_nans] = np.nan return result - return func - def _interpolate_scipy_wrapper( x, y, new_x, method, fill_value=None, bounds_error=False, order=None, **kwargs From f7c70a0a1bb80508c1e8267e691baf3c3e44b527 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sun, 5 Jul 2020 13:03:06 +0100 Subject: [PATCH 04/10] preserve_nans logic to seperate method for profiling --- pandas/core/missing.py | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 1ba08294a8de2..5e344139a27d6 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -243,20 +243,7 @@ def __init__( self.order = order self.kwargs = kwargs - def interpolate(self, yvalues: np.ndarray) -> np.ndarray: - invalid = isna(yvalues) - valid = ~invalid - - if not valid.any(): - # have to call np.asarray(xvalues) since xvalues could be an Index - # which can't be mutated - result = np.empty_like(np.asarray(self.xvalues), dtype=np.float64) - result.fill(np.nan) - return result - - if valid.all(): - return yvalues - + def _update_invalid_to_preserve_nans(self, yvalues, valid, invalid) -> None: # These are sets of index pointers to invalid values... i.e. {0, 1, etc... all_nans = set(np.flatnonzero(invalid)) start_nans = set(range(find_valid_index(yvalues, "first"))) @@ -292,10 +279,27 @@ def interpolate(self, yvalues: np.ndarray) -> np.ndarray: # sort preserve_nans and covert to list preserve_nans = sorted(preserve_nans) + invalid[preserve_nans] = False + + def interpolate(self, yvalues: np.ndarray) -> np.ndarray: + invalid = isna(yvalues) + valid = ~invalid + + if not valid.any(): + # have to call np.asarray(xvalues) since xvalues could be an Index + # which can't be mutated + result = np.empty_like(np.asarray(self.xvalues), dtype=np.float64) + result.fill(np.nan) + return result + + if valid.all(): + return yvalues yvalues = getattr(yvalues, "values", yvalues) result = yvalues.copy() + self._update_invalid_to_preserve_nans(yvalues, valid, invalid) + if self.method in NP_METHODS: # np.interp requires sorted X values, #21037 indexer = np.argsort(self.xvalues[valid]) @@ -316,7 +320,6 @@ def interpolate(self, yvalues: np.ndarray) -> np.ndarray: **self.kwargs, ) - result[preserve_nans] = np.nan return result From cb1228e38655323e0a49c94b5bf7516d89585b2a Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sun, 5 Jul 2020 13:47:25 +0100 Subject: [PATCH 05/10] add validators and convertors --- pandas/core/missing.py | 65 ++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 27 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 5e344139a27d6..b2ecb1efd2837 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -189,6 +189,39 @@ def __init__( order: Optional[int] = None, **kwargs, ): + self.method = self._validate_method(method, xvalues) + self.xvalues = self._convert_xvalues(xvalues, self.method) + + # default limit is unlimited GH #16282 + self.limit = algos._validate_limit(nobs=None, limit=limit) + self.limit_direction = self._validate_limit_direction(limit_direction) + self.limit_area = self._validate_limit_area(limit_area) + + self.fill_value = fill_value + self.bounds_error = bounds_error + self.order = order + self.kwargs = kwargs + + def _convert_xvalues(self, xvalues, method): + """ + Convert xvalues to pass to NumPy/SciPy. + """ + xvalues = getattr(xvalues, "values", xvalues) + if method == "linear": + inds = xvalues + else: + inds = np.asarray(xvalues) + + # hack for DatetimeIndex, #1646 + if needs_i8_conversion(inds.dtype): + inds = inds.view(np.int64) + + if method in ("values", "index"): + if inds.dtype == np.object_: + inds = lib.maybe_convert_objects(inds) + return inds + + def _validate_method(self, method, xvalues): if method == "time": if not getattr(xvalues, "is_all_dates", None): # if not issubclass(xvalues.dtype.type, np.datetime64): @@ -198,8 +231,9 @@ def __init__( "DatetimeIndex" ) method = "values" - self.method = method + return method + def _validate_limit_direction(self, limit_direction): valid_limit_directions = ["forward", "backward", "both"] limit_direction = limit_direction.lower() if limit_direction not in valid_limit_directions: @@ -207,8 +241,9 @@ def __init__( "Invalid limit_direction: expecting one of " f"{valid_limit_directions}, got '{limit_direction}'." ) - self.limit_direction = limit_direction + return limit_direction + def _validate_limit_area(self, limit_area): if limit_area is not None: valid_limit_areas = ["inside", "outside"] limit_area = limit_area.lower() @@ -217,31 +252,7 @@ def __init__( f"Invalid limit_area: expecting one of {valid_limit_areas}, got " f"{limit_area}." ) - self.limit_area = limit_area - - # default limit is unlimited GH #16282 - self.limit = algos._validate_limit(nobs=None, limit=limit) - - # xvalues to pass to NumPy/SciPy - - xvalues = getattr(xvalues, "values", xvalues) - if method == "linear": - inds = xvalues - else: - inds = np.asarray(xvalues) - - # hack for DatetimeIndex, #1646 - if needs_i8_conversion(inds.dtype): - inds = inds.view(np.int64) - - if method in ("values", "index"): - if inds.dtype == np.object_: - inds = lib.maybe_convert_objects(inds) - self.xvalues = inds - self.fill_value = fill_value - self.bounds_error = bounds_error - self.order = order - self.kwargs = kwargs + return limit_area def _update_invalid_to_preserve_nans(self, yvalues, valid, invalid) -> None: # These are sets of index pointers to invalid values... i.e. {0, 1, etc... From fb43c8e2ad9fe38686c4e39b7243a88dce23e359 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sun, 5 Jul 2020 15:37:09 +0100 Subject: [PATCH 06/10] move dispatch logic outside interpolate --- pandas/core/missing.py | 52 +++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index b2ecb1efd2837..ac5e2c482d962 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -202,6 +202,32 @@ def __init__( self.order = order self.kwargs = kwargs + def _np_func(yvalues, valid, invalid): + # np.interp requires sorted X values, #21037 + indexer = np.argsort(self.xvalues[valid]) + return np.interp( + self.xvalues[invalid], + self.xvalues[valid][indexer], + yvalues[valid][indexer], + ) + + def _sp_func(yvalues, valid, invalid): + return _interpolate_scipy_wrapper( + self.xvalues[valid], + yvalues[valid], + self.xvalues[invalid], + method=self.method, + fill_value=self.fill_value, + bounds_error=self.bounds_error, + order=self.order, + **self.kwargs, + ) + + if self.method in NP_METHODS: + self.func = _np_func + else: + self.func = _sp_func + def _convert_xvalues(self, xvalues, method): """ Convert xvalues to pass to NumPy/SciPy. @@ -224,7 +250,6 @@ def _convert_xvalues(self, xvalues, method): def _validate_method(self, method, xvalues): if method == "time": if not getattr(xvalues, "is_all_dates", None): - # if not issubclass(xvalues.dtype.type, np.datetime64): raise ValueError( "time-weighted interpolation only works " "on Series or DataFrames with a " @@ -297,9 +322,7 @@ def interpolate(self, yvalues: np.ndarray) -> np.ndarray: valid = ~invalid if not valid.any(): - # have to call np.asarray(xvalues) since xvalues could be an Index - # which can't be mutated - result = np.empty_like(np.asarray(self.xvalues), dtype=np.float64) + result = np.empty_like(self.xvalues, dtype=np.float64) result.fill(np.nan) return result @@ -311,26 +334,7 @@ def interpolate(self, yvalues: np.ndarray) -> np.ndarray: self._update_invalid_to_preserve_nans(yvalues, valid, invalid) - if self.method in NP_METHODS: - # np.interp requires sorted X values, #21037 - indexer = np.argsort(self.xvalues[valid]) - result[invalid] = np.interp( - self.xvalues[invalid], - self.xvalues[valid][indexer], - yvalues[valid][indexer], - ) - else: - result[invalid] = _interpolate_scipy_wrapper( - self.xvalues[valid], - yvalues[valid], - self.xvalues[invalid], - method=self.method, - fill_value=self.fill_value, - bounds_error=self.bounds_error, - order=self.order, - **self.kwargs, - ) - + result[invalid] = self.func(yvalues, valid, invalid) return result From 3482e238460fe6ca98674c491b04f888e0fedc7e Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sun, 5 Jul 2020 15:50:10 +0100 Subject: [PATCH 07/10] remove unneeded class attributes --- pandas/core/missing.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index ac5e2c482d962..97feffcbeb4e4 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -189,19 +189,15 @@ def __init__( order: Optional[int] = None, **kwargs, ): - self.method = self._validate_method(method, xvalues) - self.xvalues = self._convert_xvalues(xvalues, self.method) + method = self._validate_method(method, xvalues) + + self.xvalues = self._convert_xvalues(xvalues, method) # default limit is unlimited GH #16282 self.limit = algos._validate_limit(nobs=None, limit=limit) self.limit_direction = self._validate_limit_direction(limit_direction) self.limit_area = self._validate_limit_area(limit_area) - self.fill_value = fill_value - self.bounds_error = bounds_error - self.order = order - self.kwargs = kwargs - def _np_func(yvalues, valid, invalid): # np.interp requires sorted X values, #21037 indexer = np.argsort(self.xvalues[valid]) @@ -216,14 +212,14 @@ def _sp_func(yvalues, valid, invalid): self.xvalues[valid], yvalues[valid], self.xvalues[invalid], - method=self.method, - fill_value=self.fill_value, - bounds_error=self.bounds_error, - order=self.order, - **self.kwargs, + method=method, + fill_value=fill_value, + bounds_error=bounds_error, + order=order, + **kwargs, ) - if self.method in NP_METHODS: + if method in NP_METHODS: self.func = _np_func else: self.func = _sp_func From 871902cdf846edd54746510b31877a3a9a646f23 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sun, 5 Jul 2020 16:11:58 +0100 Subject: [PATCH 08/10] remove xvalues from class atrributes --- pandas/core/missing.py | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 97feffcbeb4e4..1ce760e2eeaa0 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -190,8 +190,7 @@ def __init__( **kwargs, ): method = self._validate_method(method, xvalues) - - self.xvalues = self._convert_xvalues(xvalues, method) + xvalues = self._convert_xvalues(xvalues, method) # default limit is unlimited GH #16282 self.limit = algos._validate_limit(nobs=None, limit=limit) @@ -200,18 +199,18 @@ def __init__( def _np_func(yvalues, valid, invalid): # np.interp requires sorted X values, #21037 - indexer = np.argsort(self.xvalues[valid]) + indexer = np.argsort(xvalues[valid]) return np.interp( - self.xvalues[invalid], - self.xvalues[valid][indexer], + xvalues[invalid], + xvalues[valid][indexer], yvalues[valid][indexer], ) def _sp_func(yvalues, valid, invalid): return _interpolate_scipy_wrapper( - self.xvalues[valid], + xvalues[valid], yvalues[valid], - self.xvalues[invalid], + xvalues[invalid], method=method, fill_value=fill_value, bounds_error=bounds_error, @@ -220,9 +219,9 @@ def _sp_func(yvalues, valid, invalid): ) if method in NP_METHODS: - self.func = _np_func + self.interpolator = _np_func else: - self.func = _sp_func + self.interpolator = _sp_func def _convert_xvalues(self, xvalues, method): """ @@ -317,12 +316,7 @@ def interpolate(self, yvalues: np.ndarray) -> np.ndarray: invalid = isna(yvalues) valid = ~invalid - if not valid.any(): - result = np.empty_like(self.xvalues, dtype=np.float64) - result.fill(np.nan) - return result - - if valid.all(): + if not valid.any() or valid.all(): return yvalues yvalues = getattr(yvalues, "values", yvalues) @@ -330,7 +324,7 @@ def interpolate(self, yvalues: np.ndarray) -> np.ndarray: self._update_invalid_to_preserve_nans(yvalues, valid, invalid) - result[invalid] = self.func(yvalues, valid, invalid) + result[invalid] = self.interpolator(yvalues, valid, invalid) return result From 8a46508390cf1ddff5a2c84dcbebf0704879373f Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sun, 5 Jul 2020 16:22:03 +0100 Subject: [PATCH 09/10] create NumPyInterpolator class --- pandas/core/missing.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 1ce760e2eeaa0..599c87a5723f2 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -197,15 +197,6 @@ def __init__( self.limit_direction = self._validate_limit_direction(limit_direction) self.limit_area = self._validate_limit_area(limit_area) - def _np_func(yvalues, valid, invalid): - # np.interp requires sorted X values, #21037 - indexer = np.argsort(xvalues[valid]) - return np.interp( - xvalues[invalid], - xvalues[valid][indexer], - yvalues[valid][indexer], - ) - def _sp_func(yvalues, valid, invalid): return _interpolate_scipy_wrapper( xvalues[valid], @@ -219,7 +210,7 @@ def _sp_func(yvalues, valid, invalid): ) if method in NP_METHODS: - self.interpolator = _np_func + self.interpolator = NumPyInterpolator(xvalues).interpolate else: self.interpolator = _sp_func @@ -328,6 +319,20 @@ def interpolate(self, yvalues: np.ndarray) -> np.ndarray: return result +class NumPyInterpolator: + # np.interp requires sorted X values, #21037 + def __init__(self, xvalues: np.ndarray): + self.xvalues = xvalues + + def interpolate(self, yvalues, valid, invalid): + indexer = np.argsort(self.xvalues[valid]) + return np.interp( + self.xvalues[invalid], + self.xvalues[valid][indexer], + yvalues[valid][indexer], + ) + + def _interpolate_scipy_wrapper( x, y, new_x, method, fill_value=None, bounds_error=False, order=None, **kwargs ): From 54b762a435e8e5c79ee1160677ff390a4a412c1e Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sun, 5 Jul 2020 16:51:15 +0100 Subject: [PATCH 10/10] move argsort from interpolate to init --- pandas/core/missing.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 599c87a5723f2..16fad3ef9a5cd 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -323,14 +323,15 @@ class NumPyInterpolator: # np.interp requires sorted X values, #21037 def __init__(self, xvalues: np.ndarray): self.xvalues = xvalues + self.indexer = np.argsort(xvalues) + self.xvalues_sorted = xvalues[self.indexer] def interpolate(self, yvalues, valid, invalid): - indexer = np.argsort(self.xvalues[valid]) - return np.interp( - self.xvalues[invalid], - self.xvalues[valid][indexer], - yvalues[valid][indexer], - ) + valid_sorted = valid[self.indexer] + x = self.xvalues[invalid] + xp = self.xvalues_sorted[valid_sorted] + yp = yvalues[self.indexer][valid_sorted] + return np.interp(x, xp, yp) def _interpolate_scipy_wrapper(