diff --git a/doc/source/api.rst b/doc/source/api.rst index e964ce569532a..9cf10d3f0780d 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -451,6 +451,7 @@ Indexing, iteration DataFrame.pop DataFrame.tail DataFrame.xs + DataFrame.isin Binary operator functions ~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/release.rst b/doc/source/release.rst index 00aba51eac37e..80cd935bc67e9 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -148,6 +148,9 @@ pandas 0.13 behavior. - ``DataFrame.update()`` no longer raises a ``DataConflictError``, it now will raise a ``ValueError`` instead (if necessary) (:issue:`4732`) + - ``Series.isin()`` and ``DataFrame.isin()`` now raise a ``TypeError`` when + passed a string (:issue:`4763`). Pass a ``list`` of one element (containing + the string) instead. **Internal Refactoring** diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0cd9f7f3f5330..8c6e7697f8ea1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4609,6 +4609,11 @@ def isin(self, values, iloc=False): else: + if not com.is_list_like(values): + raise TypeError("only list-like or dict-like objects are" + " allowed to be passed to DataFrame.isin(), " + "you passed a " + "{0!r}".format(type(values).__name__)) return DataFrame(lib.ismember(self.values.ravel(), set(values)).reshape(self.shape), self.index, diff --git a/pandas/core/series.py b/pandas/core/series.py index 1160f85751aee..5579e60ceb90e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2805,17 +2805,47 @@ def take(self, indices, axis=0, convert=True): def isin(self, values): """ - Return boolean vector showing whether each element in the Series is - exactly contained in the passed sequence of values + Return a boolean :ref:`~pandas.Series` showing whether each element in + the ref:`~pandas.Series` is exactly contained in the passed sequence of + ``values``. Parameters ---------- - values : sequence + values : list-like + The sequence of values to test. Passing in a single string will + raise a ``TypeError``: + + .. code-block:: python + + from pandas import Series + s = Series(list('abc')) + s.isin('a') + + Instead, turn a single string into a ``list`` of one element: + + .. code-block:: python + + from pandas import Series + s = Series(list('abc')) + s.isin(['a']) Returns ------- - isin : Series (boolean dtype) + isin : Series (bool dtype) + + Raises + ------ + TypeError + * If ``values`` is a string + + See Also + -------- + pandas.DataFrame.isin """ + if not com.is_list_like(values): + raise TypeError("only list-like objects are allowed to be passed" + " to Series.isin(), you passed a " + "{0!r}".format(type(values).__name__)) value_set = set(values) result = lib.ismember(_values_from_object(self), value_set) return self._constructor(result, self.index, name=self.name) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index c39634281ebb7..b4ec36ac5f29e 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -10915,6 +10915,16 @@ def test_isin_dict(self): expected.iloc[0, 0] = True assert_frame_equal(result, expected) + def test_isin_with_string_scalar(self): + #GH4763 + df = DataFrame({'vals': [1, 2, 3, 4], 'ids': ['a', 'b', 'f', 'n'], + 'ids2': ['a', 'n', 'c', 'n']}, + index=['foo', 'bar', 'baz', 'qux']) + with tm.assertRaises(TypeError): + df.isin('a') + + with tm.assertRaises(TypeError): + df.isin('aaa') if __name__ == '__main__': # unittest.main() diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 514245e82ac28..556973acdcb95 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -4433,6 +4433,16 @@ def test_isin(self): expected = Series([True, False, True, False, False, False, True, True]) assert_series_equal(result, expected) + def test_isin_with_string_scalar(self): + #GH4763 + s = Series(['A', 'B', 'C', 'a', 'B', 'B', 'A', 'C']) + with tm.assertRaises(TypeError): + s.isin('a') + + with tm.assertRaises(TypeError): + s = Series(['aaa', 'b', 'c']) + s.isin('aaa') + def test_fillna_int(self): s = Series(np.random.randint(-100, 100, 50)) s.fillna(method='ffill', inplace=True)