From d9132d7bafeaead16e5941d8080115dcebabe20e Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Fri, 9 Jul 2021 16:44:26 -0400 Subject: [PATCH 01/11] Push for testing asv fix --- asv_bench/benchmarks/algos/isin.py | 4 ---- pandas/_libs/lib.pyi | 1 + pandas/_libs/lib.pyx | 16 ++++++++++++++++ pandas/core/arrays/masked.py | 12 +++++++----- 4 files changed, 24 insertions(+), 9 deletions(-) diff --git a/asv_bench/benchmarks/algos/isin.py b/asv_bench/benchmarks/algos/isin.py index 159584d9120a5..219af7a08bd02 100644 --- a/asv_bench/benchmarks/algos/isin.py +++ b/asv_bench/benchmarks/algos/isin.py @@ -283,10 +283,6 @@ class IsInLongSeriesLookUpDominates: def setup(self, dtype, MaxNumber, series_type): N = 10 ** 7 - # https://github.com/pandas-dev/pandas/issues/39844 - if not np_version_under1p20 and dtype in ("Int64", "Float64"): - raise NotImplementedError - if series_type == "random_hits": array = np.random.randint(0, MaxNumber, N) if series_type == "random_misses": diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index 5be50f16af003..59c74b72fa196 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -221,6 +221,7 @@ def array_equivalent_object( right: np.ndarray, # object[:] ) -> bool: ... def has_infs(arr: np.ndarray) -> bool: ... # const floating[:] +def has_NA(arr: np.ndarray) -> bool: ... def get_reverse_indexer( indexer: np.ndarray, # const intp_t[:] length: int, diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 4ab2497be94d5..f9941032e6296 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -538,6 +538,22 @@ def has_infs(floating[:] arr) -> bool: return ret +@cython.wraparound(False) +@cython.boundscheck(False) +def has_NA(object[:] arr) -> bool: + """ + Return True if NA present in arr, False otherwise + """ + cdef: + Py_ssize_t i + + for i in range(len(arr)): + if arr[i] is C_NA: + return True + + return False + + def maybe_indices_to_slice(ndarray[intp_t] indices, int max_len): cdef: Py_ssize_t i, n = len(indices) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index c4b9fab28c27e..c732eae977bfa 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -399,16 +399,18 @@ def take( # error: Return type "BooleanArray" of "isin" incompatible with return type # "ndarray" in supertype "ExtensionArray" - def isin(self, values) -> BooleanArray: # type: ignore[override] + def isin(self, values: np.ndarray) -> BooleanArray: # type: ignore[override] from pandas.core.arrays import BooleanArray result = isin(self._data, values) if self._hasna: - if libmissing.NA in values: - result += self._mask - else: - result *= np.invert(self._mask) + values_have_NA = libmissing.has_NA(values) + + # For now, NA does not propagate so set result to False, see + # https://github.com/pandas-dev/pandas/pull/38379 for some discussion + result[self._mask] = values_have_NA + mask = np.zeros_like(self, dtype=bool) return BooleanArray(result, mask, copy=False) From 1902d7aef87642e1ee2e81b1d91156992e5f01a5 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Fri, 9 Jul 2021 17:29:21 -0400 Subject: [PATCH 02/11] Add tests and whatsnew --- asv_bench/benchmarks/algos/isin.py | 9 --------- doc/source/whatsnew/v1.3.1.rst | 2 +- pandas/_libs/lib.pyx | 4 +++- pandas/core/arrays/masked.py | 10 +++++++--- pandas/tests/series/methods/test_isin.py | 21 +++++++++++++++++++++ 5 files changed, 32 insertions(+), 14 deletions(-) diff --git a/asv_bench/benchmarks/algos/isin.py b/asv_bench/benchmarks/algos/isin.py index 219af7a08bd02..c6db26c0c57df 100644 --- a/asv_bench/benchmarks/algos/isin.py +++ b/asv_bench/benchmarks/algos/isin.py @@ -1,10 +1,5 @@ import numpy as np -try: - from pandas.compat import np_version_under1p20 -except ImportError: - from pandas.compat.numpy import _np_version_under1p20 as np_version_under1p20 - from pandas import ( Categorical, NaT, @@ -309,10 +304,6 @@ class IsInLongSeriesValuesDominate: def setup(self, dtype, series_type): N = 10 ** 7 - # https://github.com/pandas-dev/pandas/issues/39844 - if not np_version_under1p20 and dtype in ("Int64", "Float64"): - raise NotImplementedError - if series_type == "random": vals = np.random.randint(0, 10 * N, N) if series_type == "monotone": diff --git a/doc/source/whatsnew/v1.3.1.rst b/doc/source/whatsnew/v1.3.1.rst index 255747c3c5c6d..a766b667e89af 100644 --- a/doc/source/whatsnew/v1.3.1.rst +++ b/doc/source/whatsnew/v1.3.1.rst @@ -18,7 +18,7 @@ Fixed regressions - :class:`DataFrame` constructed with with an older version of pandas could not be unpickled (:issue:`42345`) - Performance regression in constructing a :class:`DataFrame` from a dictionary of dictionaries (:issue:`42338`) - Fixed regression in :meth:`DataFrame.agg` dropping values when the DataFrame had an Extension Array dtype, a duplicate index, and ``axis=1`` (:issue:`42380`) -- +- Fixed regression in :meth:`DataFrame.isin` and :meth:`Series.isin` raising ``TypeError`` with nullable data containing at least one missing value (:issue:`42405`) .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index f9941032e6296..0e59b9efcd3ec 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -540,13 +540,15 @@ def has_infs(floating[:] arr) -> bool: @cython.wraparound(False) @cython.boundscheck(False) -def has_NA(object[:] arr) -> bool: +def has_NA(ndarray arr) -> bool: """ Return True if NA present in arr, False otherwise """ cdef: Py_ssize_t i + assert arr.ndim == 1, "'arr' must be 1-D." + for i in range(len(arr)): if arr[i] is C_NA: return True diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index c732eae977bfa..ef65a94010dda 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -399,13 +399,17 @@ def take( # error: Return type "BooleanArray" of "isin" incompatible with return type # "ndarray" in supertype "ExtensionArray" - def isin(self, values: np.ndarray) -> BooleanArray: # type: ignore[override] + def isin(self, values) -> BooleanArray: # type: ignore[override] from pandas.core.arrays import BooleanArray - result = isin(self._data, values) + # algorithms.isin will eventually convert values to an ndarray, so no extra + # cost to doing it here first + values_arr = np.asarray(values) + result = isin(self._data, values_arr) + if self._hasna: - values_have_NA = libmissing.has_NA(values) + values_have_NA = lib.has_NA(values_arr) # For now, NA does not propagate so set result to False, see # https://github.com/pandas-dev/pandas/pull/38379 for some discussion diff --git a/pandas/tests/series/methods/test_isin.py b/pandas/tests/series/methods/test_isin.py index 898a769dfac48..dab9cc313b42f 100644 --- a/pandas/tests/series/methods/test_isin.py +++ b/pandas/tests/series/methods/test_isin.py @@ -156,6 +156,27 @@ def test_isin_float_in_int_series(self, values): expected = Series([True, False]) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("dtype", ["boolean", "Int64", "Float64"]) + @pytest.mark.parametrize( + "data,values,expected", + [ + ([0, 1, 0], [1], [False, True, False]), + ([0, 1, 0], [1, pd.NA], [False, True, False]), + ([0, pd.NA, 0], [1, 0], [True, False, True]), + ([0, 1, pd.NA], [1, pd.NA], [False, True, True]), + ([0, 1, pd.NA], [1, np.nan], [False, True, False]), + ([0, pd.NA, pd.NA], [np.nan, pd.NaT, None], [False, False, False]), + ], + ) + def test_isin_masked_types(self, dtype, data, values, expected): + # GH#42405 + ser = Series(data, dtype=dtype) + + result = ser.isin(values) + + expected = Series(expected, dtype="boolean") + tm.assert_series_equal(result, expected) + @pytest.mark.slow def test_isin_large_series_mixed_dtypes_and_nan(): From d2b988dee45ddfac6b6f75a7d1b2942ac6cabf87 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Fri, 9 Jul 2021 17:53:45 -0400 Subject: [PATCH 03/11] Comment fixups --- pandas/core/arrays/masked.py | 4 ++-- pandas/tests/series/methods/test_isin.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index ef65a94010dda..e75fd79778260 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -411,8 +411,8 @@ def isin(self, values) -> BooleanArray: # type: ignore[override] if self._hasna: values_have_NA = lib.has_NA(values_arr) - # For now, NA does not propagate so set result to False, see - # https://github.com/pandas-dev/pandas/pull/38379 for some discussion + # For now, NA does not propagate so set result according to presence of NA, + # see https://github.com/pandas-dev/pandas/pull/38379 for some discussion result[self._mask] = values_have_NA mask = np.zeros_like(self, dtype=bool) diff --git a/pandas/tests/series/methods/test_isin.py b/pandas/tests/series/methods/test_isin.py index dab9cc313b42f..d3a3434872826 100644 --- a/pandas/tests/series/methods/test_isin.py +++ b/pandas/tests/series/methods/test_isin.py @@ -173,8 +173,8 @@ def test_isin_masked_types(self, dtype, data, values, expected): ser = Series(data, dtype=dtype) result = ser.isin(values) - expected = Series(expected, dtype="boolean") + tm.assert_series_equal(result, expected) From e60c0d43fd7484ba14eef12d98c807e4394d3c6d Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Fri, 9 Jul 2021 19:15:46 -0400 Subject: [PATCH 04/11] Fix benchmark --- asv_bench/benchmarks/algos/isin.py | 39 +++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/asv_bench/benchmarks/algos/isin.py b/asv_bench/benchmarks/algos/isin.py index c6db26c0c57df..2427049bdafad 100644 --- a/asv_bench/benchmarks/algos/isin.py +++ b/asv_bench/benchmarks/algos/isin.py @@ -1,7 +1,11 @@ import numpy as np +from pandas.core.dtypes.cast import is_extension_array_dtype + from pandas import ( Categorical, + Float64Dtype, + Int64Dtype, NaT, Series, date_range, @@ -269,7 +273,15 @@ def time_isin(self, series_type, vals_type): class IsInLongSeriesLookUpDominates: params = [ - ["int64", "int32", "float64", "float32", "object", "Int64", "Float64"], + [ + "int64", + "int32", + "float64", + "float32", + "object", + Int64Dtype(), + Float64Dtype(), + ], [5, 1000], ["random_hits", "random_misses", "monotone_hits", "monotone_misses"], ] @@ -288,7 +300,12 @@ def setup(self, dtype, MaxNumber, series_type): array = np.arange(N) + MaxNumber self.series = Series(array).astype(dtype) - self.values = np.arange(MaxNumber).astype(dtype) + + if is_extension_array_dtype(dtype): + vals_dtype = dtype.type + else: + vals_dtype = dtype + self.values = np.arange(MaxNumber).astype(vals_dtype) def time_isin(self, dtypes, MaxNumber, series_type): self.series.isin(self.values) @@ -296,7 +313,15 @@ def time_isin(self, dtypes, MaxNumber, series_type): class IsInLongSeriesValuesDominate: params = [ - ["int64", "int32", "float64", "float32", "object", "Int64", "Float64"], + [ + "int64", + "int32", + "float64", + "float32", + "object", + Int64Dtype(), + Float64Dtype(), + ], ["random", "monotone"], ] param_names = ["dtype", "series_type"] @@ -309,7 +334,13 @@ def setup(self, dtype, series_type): if series_type == "monotone": vals = np.arange(N) - self.values = vals.astype(dtype) + if is_extension_array_dtype(dtype): + vals_dtype = dtype.type + else: + vals_dtype = dtype + + self.values = vals.astype(vals_dtype) + M = 10 ** 6 + 1 self.series = Series(np.arange(M)).astype(dtype) From e9022182222de6d221c8697b05dd236bcf3e4ea9 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Fri, 9 Jul 2021 19:26:40 -0400 Subject: [PATCH 05/11] Fix benchmark --- asv_bench/benchmarks/algos/isin.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/asv_bench/benchmarks/algos/isin.py b/asv_bench/benchmarks/algos/isin.py index 2427049bdafad..6359a90c10e15 100644 --- a/asv_bench/benchmarks/algos/isin.py +++ b/asv_bench/benchmarks/algos/isin.py @@ -274,11 +274,11 @@ def time_isin(self, series_type, vals_type): class IsInLongSeriesLookUpDominates: params = [ [ - "int64", - "int32", - "float64", - "float32", - "object", + # "int64", + # "int32", + # "float64", + # "float32", + # "object", Int64Dtype(), Float64Dtype(), ], From 814f978b53b9917da6fe883be60c37bf50739d87 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Fri, 9 Jul 2021 19:27:00 -0400 Subject: [PATCH 06/11] Fix benchmark --- asv_bench/benchmarks/algos/isin.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/asv_bench/benchmarks/algos/isin.py b/asv_bench/benchmarks/algos/isin.py index 6359a90c10e15..2427049bdafad 100644 --- a/asv_bench/benchmarks/algos/isin.py +++ b/asv_bench/benchmarks/algos/isin.py @@ -274,11 +274,11 @@ def time_isin(self, series_type, vals_type): class IsInLongSeriesLookUpDominates: params = [ [ - # "int64", - # "int32", - # "float64", - # "float32", - # "object", + "int64", + "int32", + "float64", + "float32", + "object", Int64Dtype(), Float64Dtype(), ], From 0a9b4e6d784d89d354aebdc2b7d86956529f195d Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Fri, 9 Jul 2021 23:30:09 -0400 Subject: [PATCH 07/11] Move has_na, change import --- asv_bench/benchmarks/algos/isin.py | 2 +- pandas/_libs/lib.pyi | 1 - pandas/_libs/lib.pyx | 18 ------------------ pandas/_libs/missing.pyx | 18 ++++++++++++++++++ pandas/core/arrays/masked.py | 2 +- 5 files changed, 20 insertions(+), 21 deletions(-) diff --git a/asv_bench/benchmarks/algos/isin.py b/asv_bench/benchmarks/algos/isin.py index 2427049bdafad..99e6ecaac34f8 100644 --- a/asv_bench/benchmarks/algos/isin.py +++ b/asv_bench/benchmarks/algos/isin.py @@ -1,6 +1,6 @@ import numpy as np -from pandas.core.dtypes.cast import is_extension_array_dtype +from pandas.core.dtypes.common import is_extension_array_dtype from pandas import ( Categorical, diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index 59c74b72fa196..5be50f16af003 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -221,7 +221,6 @@ def array_equivalent_object( right: np.ndarray, # object[:] ) -> bool: ... def has_infs(arr: np.ndarray) -> bool: ... # const floating[:] -def has_NA(arr: np.ndarray) -> bool: ... def get_reverse_indexer( indexer: np.ndarray, # const intp_t[:] length: int, diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 0e59b9efcd3ec..4ab2497be94d5 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -538,24 +538,6 @@ def has_infs(floating[:] arr) -> bool: return ret -@cython.wraparound(False) -@cython.boundscheck(False) -def has_NA(ndarray arr) -> bool: - """ - Return True if NA present in arr, False otherwise - """ - cdef: - Py_ssize_t i - - assert arr.ndim == 1, "'arr' must be 1-D." - - for i in range(len(arr)): - if arr[i] is C_NA: - return True - - return False - - def maybe_indices_to_slice(ndarray[intp_t] indices, int max_len): cdef: Py_ssize_t i, n = len(indices) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index cbe79d11fbfc9..dafd27dba7c9e 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -334,6 +334,24 @@ def isnaobj2d_old(arr: ndarray) -> ndarray: return result.view(np.bool_) +@cython.wraparound(False) +@cython.boundscheck(False) +def has_NA(ndarray arr) -> bool: + """ + Return True if NA present in arr, False otherwise + """ + cdef: + Py_ssize_t i + + assert arr.ndim == 1, "'arr' must be 1-D." + + for i in range(len(arr)): + if arr[i] is C_NA: + return True + + return False + + def isposinf_scalar(val: object) -> bool: return util.is_float_object(val) and val == INF diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index e75fd79778260..c6cb688ada35f 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -409,7 +409,7 @@ def isin(self, values) -> BooleanArray: # type: ignore[override] result = isin(self._data, values_arr) if self._hasna: - values_have_NA = lib.has_NA(values_arr) + values_have_NA = libmissing.has_NA(values_arr) # For now, NA does not propagate so set result according to presence of NA, # see https://github.com/pandas-dev/pandas/pull/38379 for some discussion From 6a68490cc430615827c449dcbd4e91a622b7c335 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Sat, 10 Jul 2021 16:08:36 -0400 Subject: [PATCH 08/11] Only check NA for object --- pandas/_libs/missing.pyx | 4 +--- pandas/core/arrays/masked.py | 4 +++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index dafd27dba7c9e..8fa4ed5eb6215 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -336,15 +336,13 @@ def isnaobj2d_old(arr: ndarray) -> ndarray: @cython.wraparound(False) @cython.boundscheck(False) -def has_NA(ndarray arr) -> bool: +def has_NA(ndarray[object, ndim=1] arr) -> bool: """ Return True if NA present in arr, False otherwise """ cdef: Py_ssize_t i - assert arr.ndim == 1, "'arr' must be 1-D." - for i in range(len(arr)): if arr[i] is C_NA: return True diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index c6cb688ada35f..7255e86e66493 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -409,7 +409,9 @@ def isin(self, values) -> BooleanArray: # type: ignore[override] result = isin(self._data, values_arr) if self._hasna: - values_have_NA = libmissing.has_NA(values_arr) + values_have_NA = is_object_dtype(values_arr.dtype) and libmissing.has_NA( + values_arr + ) # For now, NA does not propagate so set result according to presence of NA, # see https://github.com/pandas-dev/pandas/pull/38379 for some discussion From 44bcd670c121736295633aad772a65b412092fca Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Sun, 11 Jul 2021 15:22:23 -0400 Subject: [PATCH 09/11] Update benchmark to make allowed clearer --- asv_bench/benchmarks/algos/isin.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/asv_bench/benchmarks/algos/isin.py b/asv_bench/benchmarks/algos/isin.py index 99e6ecaac34f8..ea97993826511 100644 --- a/asv_bench/benchmarks/algos/isin.py +++ b/asv_bench/benchmarks/algos/isin.py @@ -1,7 +1,5 @@ import numpy as np -from pandas.core.dtypes.common import is_extension_array_dtype - from pandas import ( Categorical, Float64Dtype, @@ -301,11 +299,10 @@ def setup(self, dtype, MaxNumber, series_type): self.series = Series(array).astype(dtype) - if is_extension_array_dtype(dtype): - vals_dtype = dtype.type - else: - vals_dtype = dtype - self.values = np.arange(MaxNumber).astype(vals_dtype) + if isinstance(dtype, (Int64Dtype, Float64Dtype)): + dtype = dtype.type + + self.values = np.arange(MaxNumber).astype(dtype) def time_isin(self, dtypes, MaxNumber, series_type): self.series.isin(self.values) @@ -334,12 +331,10 @@ def setup(self, dtype, series_type): if series_type == "monotone": vals = np.arange(N) - if is_extension_array_dtype(dtype): - vals_dtype = dtype.type - else: - vals_dtype = dtype + if isinstance(dtype, (Int64Dtype, Float64Dtype)): + dtype = dtype.type - self.values = vals.astype(vals_dtype) + self.values = vals.astype(dtype) M = 10 ** 6 + 1 self.series = Series(np.arange(M)).astype(dtype) From e7b66b7ef7561fcd4d6306396a9b71d3de17dc56 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Sun, 11 Jul 2021 23:58:46 -0400 Subject: [PATCH 10/11] Simpler benchmark fix --- asv_bench/benchmarks/algos/isin.py | 33 ++++-------------------------- 1 file changed, 4 insertions(+), 29 deletions(-) diff --git a/asv_bench/benchmarks/algos/isin.py b/asv_bench/benchmarks/algos/isin.py index ea97993826511..5d7a76bc01d49 100644 --- a/asv_bench/benchmarks/algos/isin.py +++ b/asv_bench/benchmarks/algos/isin.py @@ -2,8 +2,6 @@ from pandas import ( Categorical, - Float64Dtype, - Int64Dtype, NaT, Series, date_range, @@ -271,15 +269,7 @@ def time_isin(self, series_type, vals_type): class IsInLongSeriesLookUpDominates: params = [ - [ - "int64", - "int32", - "float64", - "float32", - "object", - Int64Dtype(), - Float64Dtype(), - ], + ["int64", "int32", "float64", "float32", "object", "Int64", "Float64"], [5, 1000], ["random_hits", "random_misses", "monotone_hits", "monotone_misses"], ] @@ -299,10 +289,7 @@ def setup(self, dtype, MaxNumber, series_type): self.series = Series(array).astype(dtype) - if isinstance(dtype, (Int64Dtype, Float64Dtype)): - dtype = dtype.type - - self.values = np.arange(MaxNumber).astype(dtype) + self.values = np.arange(MaxNumber).astype(dtype.lower()) def time_isin(self, dtypes, MaxNumber, series_type): self.series.isin(self.values) @@ -310,15 +297,7 @@ def time_isin(self, dtypes, MaxNumber, series_type): class IsInLongSeriesValuesDominate: params = [ - [ - "int64", - "int32", - "float64", - "float32", - "object", - Int64Dtype(), - Float64Dtype(), - ], + ["int64", "int32", "float64", "float32", "object", "Int64", "Float64"], ["random", "monotone"], ] param_names = ["dtype", "series_type"] @@ -331,11 +310,7 @@ def setup(self, dtype, series_type): if series_type == "monotone": vals = np.arange(N) - if isinstance(dtype, (Int64Dtype, Float64Dtype)): - dtype = dtype.type - - self.values = vals.astype(dtype) - + self.values = vals.astype(dtype.lower()) M = 10 ** 6 + 1 self.series = Series(np.arange(M)).astype(dtype) From 17a50b9deceb4ff75681c541d6c01ef859e50831 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Tue, 13 Jul 2021 20:59:50 -0400 Subject: [PATCH 11/11] Handle in python space --- pandas/_libs/missing.pyx | 16 ---------------- pandas/core/arrays/masked.py | 4 ++-- 2 files changed, 2 insertions(+), 18 deletions(-) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 8fa4ed5eb6215..cbe79d11fbfc9 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -334,22 +334,6 @@ def isnaobj2d_old(arr: ndarray) -> ndarray: return result.view(np.bool_) -@cython.wraparound(False) -@cython.boundscheck(False) -def has_NA(ndarray[object, ndim=1] arr) -> bool: - """ - Return True if NA present in arr, False otherwise - """ - cdef: - Py_ssize_t i - - for i in range(len(arr)): - if arr[i] is C_NA: - return True - - return False - - def isposinf_scalar(val: object) -> bool: return util.is_float_object(val) and val == INF diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 7255e86e66493..3a152bd5889b7 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -409,8 +409,8 @@ def isin(self, values) -> BooleanArray: # type: ignore[override] result = isin(self._data, values_arr) if self._hasna: - values_have_NA = is_object_dtype(values_arr.dtype) and libmissing.has_NA( - values_arr + values_have_NA = is_object_dtype(values_arr.dtype) and any( + val is self.dtype.na_value for val in values_arr ) # For now, NA does not propagate so set result according to presence of NA,