From 63c81608c80713a8c68727f033da49d16e5c47d4 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 17 Dec 2022 22:23:11 +0100 Subject: [PATCH 1/3] PERF: Calculate mask in interpolate only once --- pandas/core/generic.py | 2 +- pandas/core/missing.py | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a4d99cb0eca42..b5b5270292e08 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11643,7 +11643,7 @@ def _find_valid_index(self, *, how: str) -> Hashable | None: ------- idx_first_valid : type of index """ - idxpos = find_valid_index(self._values, how=how) + idxpos = find_valid_index(self._values, how=how, is_valid=isna(self._values)) if idxpos is None: return None return self.index[idxpos] diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 0d058ead9d22c..8d3075805220c 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -170,7 +170,7 @@ def clean_interp_method(method: str, index: Index, **kwargs) -> str: return method -def find_valid_index(values, *, how: str) -> int | None: +def find_valid_index(values, *, how: str, is_valid: np.ndarray) -> int | None: """ Retrieves the index of the first valid value. @@ -179,6 +179,8 @@ def find_valid_index(values, *, how: str) -> int | None: values : ndarray or ExtensionArray how : {'first', 'last'} Use this parameter to change between the first or last valid index. + is_valid: np.ndarray + Mask to find na_values. Returns ------- @@ -189,8 +191,6 @@ def find_valid_index(values, *, how: str) -> int | None: if len(values) == 0: # early stop return None - is_valid = ~isna(values) - if values.ndim == 2: is_valid = is_valid.any(axis=1) # reduce axis 1 @@ -400,12 +400,12 @@ def _interpolate_1d( # These are sets of index pointers to invalid values... i.e. {0, 1, etc... all_nans = set(np.flatnonzero(invalid)) - first_valid_index = find_valid_index(yvalues, how="first") + first_valid_index = find_valid_index(yvalues, how="first", is_valid=valid) if first_valid_index is None: # no nan found in start first_valid_index = 0 start_nans = set(range(first_valid_index)) - last_valid_index = find_valid_index(yvalues, how="last") + last_valid_index = find_valid_index(yvalues, how="last", is_valid=valid) if last_valid_index is None: # no nan found in end last_valid_index = len(yvalues) end_nans = set(range(1 + last_valid_index, len(valid))) @@ -738,12 +738,13 @@ def _interpolate_with_limit_area( """ invalid = isna(values) + is_valid = ~invalid if not invalid.all(): - first = find_valid_index(values, how="first") + first = find_valid_index(values, how="first", is_valid=is_valid) if first is None: first = 0 - last = find_valid_index(values, how="last") + last = find_valid_index(values, how="last", is_valid=is_valid) if last is None: last = len(values) From 5b304c2d90ca9b02b39ee60e3053bb4c8f8f96c4 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 17 Dec 2022 23:10:20 +0100 Subject: [PATCH 2/3] Fix test --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b5b5270292e08..9534eca51f383 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11643,7 +11643,7 @@ def _find_valid_index(self, *, how: str) -> Hashable | None: ------- idx_first_valid : type of index """ - idxpos = find_valid_index(self._values, how=how, is_valid=isna(self._values)) + idxpos = find_valid_index(self._values, how=how, is_valid=~isna(self._values)) if idxpos is None: return None return self.index[idxpos] From 8abcfb743b853063fe8d4f7d00c81a0f16b53924 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 18 Dec 2022 01:51:52 +0100 Subject: [PATCH 3/3] Fix mypy --- pandas/core/missing.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 8d3075805220c..0da4f6404c3cc 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -170,7 +170,9 @@ def clean_interp_method(method: str, index: Index, **kwargs) -> str: return method -def find_valid_index(values, *, how: str, is_valid: np.ndarray) -> int | None: +def find_valid_index( + values, *, how: str, is_valid: npt.NDArray[np.bool_] +) -> int | None: """ Retrieves the index of the first valid value. @@ -204,7 +206,9 @@ def find_valid_index(values, *, how: str, is_valid: np.ndarray) -> int | None: if not chk_notna: return None - return idxpos + # Incompatible return value type (got "signedinteger[Any]", + # expected "Optional[int]") + return idxpos # type: ignore[return-value] def interpolate_array_2d(