Skip to content

Commit 27b5b16

Browse files
committed
index multi
1 parent b15dc7e commit 27b5b16

File tree

5 files changed

+70
-13
lines changed

5 files changed

+70
-13
lines changed

doc/source/whatsnew/v0.24.1.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ This allows ``sort=True`` to now mean "always sort". A ``TypeError`` is raised i
5353
5454
idx.union(idx, sort=True)
5555
56-
The same change applies to :meth:`Index.symmetric_difference`, which would previously not
57-
sort the result when ``sort=True`` but the values could not be compared.
56+
The same change applies to :meth:`Index.difference` and :meth:`Index.symmetric_difference`, which
57+
would previously not sort the result when ``sort=True`` but the values could not be compared.
5858

5959
Changed the behavior of :meth:`Index.intersection` with ``sort=True``
6060
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

pandas/core/indexes/base.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2447,7 +2447,7 @@ def intersection(self, other, sort=False):
24472447

24482448
return taken
24492449

2450-
def difference(self, other, sort=True):
2450+
def difference(self, other, sort=None):
24512451
"""
24522452
Return a new Index with elements from the index that are not in
24532453
`other`.
@@ -2457,11 +2457,24 @@ def difference(self, other, sort=True):
24572457
Parameters
24582458
----------
24592459
other : Index or array-like
2460-
sort : bool, default True
2461-
Sort the resulting index if possible
2460+
sort : bool or None, default None
2461+
Whether to sort the resulting index. By default, the
2462+
values are attempted to be sorted, but any TypeError from
2463+
incomparable elements is caught by pandas.
2464+
2465+
* None : Attempt to sort the result, but catch any TypeErrors
2466+
from comparing incomparable elements.
2467+
* False : Do not sort the result.
2468+
* True : Sort the result, raising a TypeError if any elements
2469+
cannot be compared.
24622470
24632471
.. versionadded:: 0.24.0
24642472
2473+
.. versionchanged:: 0.24.1
2474+
2475+
Added the `None` option, which matches the behavior of
2476+
pandas 0.23.4 and earlier.
2477+
24652478
Returns
24662479
-------
24672480
difference : Index
@@ -2492,11 +2505,13 @@ def difference(self, other, sort=True):
24922505
label_diff = np.setdiff1d(np.arange(this.size), indexer,
24932506
assume_unique=True)
24942507
the_diff = this.values.take(label_diff)
2495-
if sort:
2508+
if sort is None:
24962509
try:
24972510
the_diff = sorting.safe_sort(the_diff)
24982511
except TypeError:
24992512
pass
2513+
elif sort:
2514+
the_diff = sorting.safe_sort(the_diff)
25002515

25012516
return this._shallow_copy(the_diff, name=result_name, freq=None)
25022517

@@ -2508,7 +2523,7 @@ def symmetric_difference(self, other, result_name=None, sort=None):
25082523
----------
25092524
other : Index or array-like
25102525
result_name : str
2511-
sort : bool or None, default None.
2526+
sort : bool or None, default None
25122527
Whether to sort the resulting index. By default, the
25132528
values are attempted to be sorted, but any TypeError from
25142529
incomparable elements is caught by pandas.

pandas/core/indexes/multi.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2973,7 +2973,7 @@ def intersection(self, other, sort=False):
29732973
return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0,
29742974
names=result_names)
29752975

2976-
def difference(self, other, sort=True):
2976+
def difference(self, other, sort=None):
29772977
"""
29782978
Compute set difference of two MultiIndex objects
29792979
@@ -2993,6 +2993,8 @@ def difference(self, other, sort=True):
29932993
other, result_names = self._convert_can_do_setop(other)
29942994

29952995
if len(other) == 0:
2996+
if sort:
2997+
return self.sort_values()
29962998
return self
29972999

29983000
if self.equals(other):

pandas/tests/indexes/multi/test_set_ops.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,10 @@ def test_difference(idx, sort):
174174

175175
# name from empty array
176176
result = first.difference([], sort=sort)
177-
assert first.equals(result)
177+
if sort:
178+
assert first.sort_values().equals(result)
179+
else:
180+
assert first.equals(result)
178181
assert first.names == result.names
179182

180183
# name from non-empty array
@@ -189,6 +192,36 @@ def test_difference(idx, sort):
189192
first.difference([1, 2, 3, 4, 5], sort=sort)
190193

191194

195+
def test_difference_sort_special():
196+
idx = pd.MultiIndex.from_product([[1, 0], ['a', 'b']])
197+
# sort=None, the default
198+
result = idx.difference([])
199+
tm.assert_index_equal(result, idx)
200+
201+
result = idx.difference([], sort=True)
202+
expected = pd.MultiIndex.from_product([[0, 1], ['a', 'b']])
203+
tm.assert_index_equal(result, expected)
204+
205+
206+
def test_difference_sort_incomparable():
207+
idx = pd.MultiIndex.from_product([[1, pd.Timestamp('2000'), 2],
208+
['a', 'b']])
209+
210+
other = pd.MultiIndex.from_product([[3, pd.Timestamp('2000'), 4],
211+
['c', 'd']])
212+
# sort=None, the default
213+
# result = idx.difference(other)
214+
# tm.assert_index_equal(result, idx)
215+
216+
# sort=False
217+
result = idx.difference(other)
218+
tm.assert_index_equal(result, idx)
219+
220+
# sort=True, raises
221+
with pytest.raises(TypeError):
222+
idx.difference(other, sort=True)
223+
224+
192225
@pytest.mark.parametrize("sort", [True, False])
193226
def test_union(idx, sort):
194227
piece1 = idx[:5][::-1]

pandas/tests/indexes/test_base.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from collections import defaultdict
44
from datetime import datetime, timedelta
55
import math
6+
import operator
67
import sys
78

89
import numpy as np
@@ -1099,22 +1100,28 @@ def test_symmetric_difference(self, sort):
10991100
assert tm.equalContents(result, expected)
11001101
assert result.name is None
11011102

1102-
def test_symmetric_difference_incomparable(self):
1103+
@pytest.mark.parametrize('opname', ['difference', 'symmetric_difference'])
1104+
def test_difference_incomparable(self, opname):
11031105
a = pd.Index([3, pd.Timestamp('2000'), 1])
11041106
b = pd.Index([2, pd.Timestamp('1999'), 1])
1107+
op = operator.methodcaller(opname, b)
11051108

11061109
# sort=None, the default
1107-
result = a.symmetric_difference(b)
1110+
result = op(a)
11081111
expected = pd.Index([3, pd.Timestamp('2000'), 2, pd.Timestamp('1999')])
1112+
if opname == 'difference':
1113+
expected = expected[:2]
11091114
tm.assert_index_equal(result, expected)
11101115

11111116
# sort=False
1112-
result = a.symmetric_difference(b, sort=False)
1117+
op = operator.methodcaller(opname, b, sort=False)
1118+
result = op(a)
11131119
tm.assert_index_equal(result, expected)
11141120

11151121
# sort=True, raises
1122+
op = operator.methodcaller(opname, b, sort=True)
11161123
with pytest.raises(TypeError, match='Cannot compare'):
1117-
a.symmetric_difference(b, sort=True)
1124+
op(a)
11181125

11191126
@pytest.mark.parametrize("sort", [True, False])
11201127
def test_symmetric_difference_mi(self, sort):

0 commit comments

Comments
 (0)