pandas-dev · kykosic · Mar 21, 2019 · Mar 21, 2019 · Mar 21, 2019 · Mar 21, 2019
diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
@@ -29,10 +29,14 @@ class IsIn(object):
     def setup(self, dtype):
         self.s = Series(np.random.randint(1, 10, 100000)).astype(dtype)
         self.values = [1, 2]
+        self.values_set = set(self.values)
 
     def time_isin(self, dtypes):
         self.s.isin(self.values)
 
+    def time_isin_set(self, dtypes):
+        self.s.isin(self.values_set)
+
 
 class IsInFloat64(object):
 

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -176,6 +176,7 @@ Performance Improvements
   int8/int16/int32 and the searched key is within the integer bounds for the dtype (:issue:`22034`)
 - Improved performance of :meth:`pandas.core.groupby.GroupBy.quantile` (:issue:`20405`)
 - Improved performance of :meth:`read_csv` by faster tokenizing and faster parsing of small float numbers (:issue:`25784`)
+- Improved performance of :meth:`Series.isin` and :meth:`DataFrame.isin` when passing a set (:issue:`25507`).
 - Improved performance of :meth:`read_csv` by faster parsing of N/A and boolean values (:issue:`25804`)
 
 .. _whatsnew_0250.bug_fixes:

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -21,9 +21,9 @@
     is_complex_dtype, is_datetime64_any_dtype, is_datetime64tz_dtype,
     is_datetimelike, is_extension_array_dtype, is_float_dtype, is_integer,
     is_integer_dtype, is_interval_dtype, is_list_like, is_numeric_dtype,
-    is_object_dtype, is_period_dtype, is_scalar, is_signed_integer_dtype,
-    is_sparse, is_timedelta64_dtype, is_unsigned_integer_dtype,
-    needs_i8_conversion)
+    is_object_dtype, is_period_dtype, is_scalar, is_set_like,
+    is_signed_integer_dtype, is_sparse, is_timedelta64_dtype,
+    is_unsigned_integer_dtype, needs_i8_conversion)
 from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries
 from pandas.core.dtypes.missing import isna, na_value_for_dtype
 
@@ -395,6 +395,14 @@ def isin(comps, values):
                         " to isin(), you passed a [{values_type}]"
                         .format(values_type=type(values).__name__))
 
+    # GH 25507
+    # if `values` is a set, directly use it instead of hashing a list
+    if is_set_like(values):
+        result = np.empty_like(comps, dtype=np.bool)
+        for i, comp in enumerate(comps):
+            result[i] = comp in values
+        return result
+
     if not isinstance(values, (ABCIndex, ABCSeries, np.ndarray)):
         values = construct_1d_object_array_from_listlike(list(values))
 

diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
@@ -17,7 +17,7 @@
     is_array_like, is_bool, is_complex, is_decimal, is_dict_like, is_file_like,
     is_float, is_hashable, is_integer, is_interval, is_iterator, is_list_like,
     is_named_tuple, is_nested_list_like, is_number, is_re, is_re_compilable,
-    is_scalar, is_sequence, is_string_like)
+    is_scalar, is_sequence, is_set_like, is_string_like)
 
 _POSSIBLY_CAST_DTYPES = {np.dtype(t).name
                          for t in ['O', 'int8', 'uint8', 'int16', 'uint16',

diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py
@@ -301,6 +301,36 @@ def is_list_like(obj, allow_sets=True):
             and not (allow_sets is False and isinstance(obj, Set)))
 
 
+def is_set_like(obj):
+    """
+    Check if the object is set-like.
+
+    Parameters
+    ----------
+    obj : The object to check
+
+    Returns
+    -------
+    is_set_like : bool
+        Whether `obj` has set-like properties.
+
+    Examples
+    --------
+    >>> is_set_like({1, 2})
+    True
+    >>> is_set_like(frozenset([1, 2]))
+    True
+    >>> is_set_like(set())
+    True
+    >>> is_set_like(set)
+    False
+    >>> is_set_like({1: 2, 3: 4})
+    False
+    """
+
+    return isinstance(obj, (set, frozenset))
+
+
 def is_array_like(obj):
     """
     Check if the object is array-like.

diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
@@ -24,7 +24,7 @@
 from pandas.core.dtypes.common import (
     ensure_categorical, ensure_int32, is_bool, is_datetime64_any_dtype,
     is_datetime64_dtype, is_datetime64_ns_dtype, is_datetime64tz_dtype,
-    is_float, is_integer, is_number, is_scalar, is_scipy_sparse,
+    is_float, is_integer, is_number, is_scalar, is_scipy_sparse, is_set_like,
     is_timedelta64_dtype, is_timedelta64_ns_dtype)
 
 import pandas as pd
@@ -103,6 +103,22 @@ def test_is_list_like_disallow_sets(maybe_list_like):
     assert inference.is_list_like(obj, allow_sets=False) == expected
 
 
+@pytest.mark.parametrize('obj,expected', [
+    ({1, 2}, True),
+    (set(), True),
+    (set, False),
+    ([1, 2], False),
+    ({1: 2}, False),
+    (frozenset([1, 2]), True),
+    (frozenset(), True),
+    (frozenset, False),
+    ((1, 2), False),
+    ([], False),
+])
+def test_is_set_like(obj, expected):
+    assert is_set_like(obj) == expected
+
+
 def test_is_sequence():
     is_seq = inference.is_sequence
     assert (is_seq((1, 2)))

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
@@ -803,6 +803,21 @@ def test_different_nans_as_float64(self):
         expected = np.array([True, True])
         tm.assert_numpy_array_equal(result, expected)
 
+    @pytest.mark.parametrize("comps,values,expected", [
+        (np.array([1, 2, 3]), {1, np.int(2)}, np.array([True, True, False])),
+        (np.array(['a', 'b']), {1, 'b'}, np.array([False, True])),
+        (np.array([1.0, 2.0]), {1, 2}, np.array([True, True])),
+        (pd.date_range("2019-01-01", "2019-01-03"),
+         {datetime(2019, 1, 2)}, np.array([False, True, False])),
+        (pd.Categorical(['a', 'b']), {0, 'b'}, np.array([False, True])),
+        (np.array([np.nan, float('nan')]), {float('nan')},
+         np.array([False, False]))
+    ])
+    def test_set(self, comps, values, expected):
+        # GH 25507
+        actual = algos.isin(comps, values)
+        assert tm.assert_numpy_array_equal(actual, expected)
+
 
 class TestValueCounts(object):