Skip to content

Commit b874eed

Browse files
committed
BUG/API: FutureWarning from Categorical.take indices
We're changing how Categorical.take handles negative indices to be in line with Series and other EAs.
1 parent 644138f commit b874eed

File tree

5 files changed

+99
-8
lines changed

5 files changed

+99
-8
lines changed

doc/source/whatsnew/v0.23.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -856,6 +856,7 @@ Other API Changes
856856
- Constructing a Series from a list of length 1 no longer broadcasts this list when a longer index is specified (:issue:`19714`, :issue:`20391`).
857857
- :func:`DataFrame.to_dict` with ``orient='index'`` no longer casts int columns to float for a DataFrame with only int and float columns (:issue:`18580`)
858858
- A user-defined-function that is passed to :func:`Series.rolling().aggregate() <pandas.core.window.Rolling.aggregate>`, :func:`DataFrame.rolling().aggregate() <pandas.core.window.Rolling.aggregate>`, or its expanding cousins, will now *always* be passed a ``Series``, rather than a ``np.array``; ``.apply()`` only has the ``raw`` keyword, see :ref:`here <whatsnew_0230.enhancements.window_raw>`. This is consistent with the signatures of ``.aggregate()`` across pandas (:issue:`20584`)
859+
- The previous default behavior of negative indices in ``Categorical.take`` is deprecated. In a future version it will change from meaning missing values to meaning positional indicies from the right (:issue:`20664`)
859860

860861
.. _whatsnew_0230.deprecations:
861862

pandas/core/arrays/categorical.py

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import numpy as np
44
from warnings import warn
5+
import textwrap
56
import types
67

78
from pandas import compat
@@ -29,7 +30,7 @@
2930
is_scalar,
3031
is_dict_like)
3132

32-
from pandas.core.algorithms import factorize, take_1d, unique1d
33+
from pandas.core.algorithms import factorize, take_1d, unique1d, take
3334
from pandas.core.accessor import PandasDelegate
3435
from pandas.core.base import (PandasObject,
3536
NoNewAttributesMixin, _shared_docs)
@@ -48,6 +49,17 @@
4849
from .base import ExtensionArray
4950

5051

52+
_take_msg = textwrap.dedent("""\
53+
Interpreting negative values in 'indexer' as missing values.
54+
In the future, this will change to meaning positional indicies
55+
from the right.
56+
57+
Use 'allow_fill=True' to retain the previous behavior and silence this
58+
warning.
59+
60+
Use 'allow_fill=False' to accept the new behavior.""")
61+
62+
5163
def _cat_compare_op(op):
5264
def f(self, other):
5365
# On python2, you can usually compare any type to any type, and
@@ -1732,17 +1744,25 @@ def fillna(self, value=None, method=None, limit=None):
17321744
return self._constructor(values, categories=self.categories,
17331745
ordered=self.ordered, fastpath=True)
17341746

1735-
def take_nd(self, indexer, allow_fill=True, fill_value=None):
1736-
""" Take the codes by the indexer, fill with the fill_value.
1747+
def take_nd(self, indexer, allow_fill=None, fill_value=None):
1748+
"""
1749+
Return the indices
17371750
17381751
For internal compatibility with numpy arrays.
17391752
"""
1753+
indexer = np.asarray(indexer, dtype=np.intp)
1754+
if allow_fill is None:
1755+
if (indexer < 0).any():
1756+
warn(_take_msg, FutureWarning, stacklevel=2)
1757+
allow_fill = True
17401758

1741-
# filling must always be None/nan here
1742-
# but is passed thru internally
1743-
assert isna(fill_value)
1759+
if fill_value is None or isna(fill_value):
1760+
# For backwards compatability, we have to override
1761+
# any na values for `fill_value`
1762+
fill_value = -1
17441763

1745-
codes = take_1d(self._codes, indexer, allow_fill=True, fill_value=-1)
1764+
codes = take(self._codes, indexer, allow_fill=allow_fill,
1765+
fill_value=fill_value)
17461766
result = self._constructor(codes, categories=self.categories,
17471767
ordered=self.ordered, fastpath=True)
17481768
return result

pandas/core/series.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3499,7 +3499,14 @@ def _take(self, indices, axis=0, convert=True, is_copy=False):
34993499

35003500
indices = _ensure_platform_int(indices)
35013501
new_index = self.index.take(indices)
3502-
new_values = self._values.take(indices)
3502+
3503+
if is_categorical_dtype(self):
3504+
# https://github.com/pandas-dev/pandas/issues/20664
3505+
# TODO: remove when the default Categorical.take behavior changes
3506+
kwargs = {'allow_fill': False}
3507+
else:
3508+
kwargs = {}
3509+
new_values = self._values.take(indices, **kwargs)
35033510

35043511
result = (self._constructor(new_values, index=new_index,
35053512
fastpath=True).__finalize__(self))

pandas/tests/categorical/test_algos.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,17 @@
55
import pandas.util.testing as tm
66

77

8+
@pytest.fixture(params=[True, False])
9+
def allow_fill(request):
10+
"""Boolean 'allow_fill' parameter for Categorical.take"""
11+
return request.param
12+
13+
14+
@pytest.fixture(params=[True, False])
15+
def ordered(request):
16+
"""Boolean 'ordered' parameter for Categorical."""
17+
return request.param
18+
819
@pytest.mark.parametrize('ordered', [True, False])
920
@pytest.mark.parametrize('categories', [
1021
['b', 'a', 'c'],
@@ -69,3 +80,45 @@ def test_isin_empty(empty):
6980

7081
result = s.isin(empty)
7182
tm.assert_numpy_array_equal(expected, result)
83+
84+
85+
class TestTake(object):
86+
# https://github.com/pandas-dev/pandas/issues/20664
87+
88+
def test_take_warns(self):
89+
cat = pd.Categorical(['a', 'b'])
90+
with tm.assert_produces_warning(FutureWarning):
91+
cat.take([0, -1])
92+
93+
def test_take_positive_no_warning(self):
94+
cat = pd.Categorical(['a', 'b'])
95+
with tm.assert_produces_warning(None):
96+
cat.take([0, 0])
97+
98+
def test_take_bounds(self, allow_fill):
99+
# https://github.com/pandas-dev/pandas/issues/20664
100+
cat = pd.Categorical(['a', 'b', 'a'])
101+
with pytest.raises(IndexError):
102+
cat.take([4, 5], allow_fill=allow_fill)
103+
104+
def test_take_empty(self, allow_fill):
105+
# https://github.com/pandas-dev/pandas/issues/20664
106+
cat = pd.Categorical([], categories=['a', 'b'])
107+
with pytest.raises(IndexError):
108+
cat.take([0], allow_fill=allow_fill)
109+
110+
def test_positional_take(self, ordered):
111+
cat = pd.Categorical(['a', 'a', 'b', 'b'], categories=['b', 'a'],
112+
ordered=ordered)
113+
result = cat.take([0, 1, 2], allow_fill=False)
114+
expected = pd.Categorical(['a', 'a', 'b'], categories=cat.categories,
115+
ordered=ordered)
116+
tm.assert_categorical_equal(result, expected)
117+
118+
def test_positional_take_unobserved(self, ordered):
119+
cat = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'],
120+
ordered=ordered)
121+
result = cat.take([1, 0], allow_fill=False)
122+
expected = pd.Categorical(['b', 'a'], categories=cat.categories,
123+
ordered=ordered)
124+
tm.assert_categorical_equal(result, expected)

pandas/tests/series/indexing/test_indexing.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -753,6 +753,16 @@ def test_take():
753753
s.take([-1, 3, 4], convert=False)
754754

755755

756+
def test_take_categorical():
757+
# https://github.com/pandas-dev/pandas/issues/20664
758+
s = Series(pd.Categorical(['a', 'b', 'c']))
759+
result = s.take([-2, -2, 0])
760+
expected = Series(pd.Categorical(['b', 'b', 'a'],
761+
categories=['a', 'b', 'c']),
762+
index=[1, 1, 0])
763+
assert_series_equal(result, expected)
764+
765+
756766
def test_head_tail(test_data):
757767
assert_series_equal(test_data.series.head(), test_data.series[:5])
758768
assert_series_equal(test_data.series.head(0), test_data.series[0:0])

0 commit comments

Comments
 (0)