Skip to content

Commit 1564d4f

Browse files
Change True to None; disallow True
1 parent 27b5b16 commit 1564d4f

File tree

16 files changed

+217
-258
lines changed

16 files changed

+217
-258
lines changed

doc/source/whatsnew/v0.24.1.rst

Lines changed: 8 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -20,74 +20,24 @@ including other versions of pandas.
2020
API Changes
2121
~~~~~~~~~~~
2222

23-
Changing the ``sort`` parameter for :meth:`Index.union`
24-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
23+
Changing the ``sort`` parameter for :class:`Index` set operations
24+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2525

2626
The default ``sort`` value for :meth:`Index.union` has changed from ``True`` to ``None`` (:issue:`24959`).
27-
The default *behavior* remains the same: The result is sorted, unless
27+
The default *behavior*, however, remains the same: the result is sorted, unless
2828

2929
1. ``self`` and ``other`` are identical
3030
2. ``self`` or ``other`` is empty
3131
3. ``self`` or ``other`` contain values that can not be compared (a ``RuntimeWarning`` is raised).
3232

33-
This allows ``sort=True`` to now mean "always sort". A ``TypeError`` is raised if the values cannot be compared.
34-
35-
**Behavior in 0.24.0**
36-
37-
.. ipython:: python
38-
39-
In [1]: idx = pd.Index(['b', 'a'])
40-
41-
In [2]: idx.union(idx) # sort=True was the default.
42-
Out[2]: Index(['b', 'a'], dtype='object')
43-
44-
In [3]: idx.union(idx, sort=True) # result is still not sorted.
45-
Out[32]: Index(['b', 'a'], dtype='object')
46-
47-
**New Behavior**
48-
49-
.. ipython:: python
50-
51-
idx = pd.Index(['b', 'a'])
52-
idx.union(idx) # sort=None is the default. Don't sort identical operands.
53-
54-
idx.union(idx, sort=True)
33+
This change will allow to preserve ``sort=True`` to mean "always sort" in a future release.
5534

5635
The same change applies to :meth:`Index.difference` and :meth:`Index.symmetric_difference`, which
57-
would previously not sort the result when ``sort=True`` but the values could not be compared.
58-
59-
Changed the behavior of :meth:`Index.intersection` with ``sort=True``
60-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
61-
62-
When ``sort=True`` is provided to :meth:`Index.intersection`, the values are always sorted. In 0.24.0,
63-
the values would not be sorted when ``self`` and ``other`` were identical. Pass ``sort=False`` to not
64-
sort the values. This matches the behavior of pandas 0.23.4 and earlier.
65-
66-
**Behavior in 0.23.4**
67-
68-
.. ipython:: python
69-
70-
In [2]: idx = pd.Index(['b', 'a'])
71-
72-
In [3]: idx.intersection(idx) # sort was not a keyword.
73-
Out[3]: Index(['b', 'a'], dtype='object')
74-
75-
**Behavior in 0.24.0**
76-
77-
.. ipython:: python
78-
79-
In [5]: idx.intersection(idx) # sort=True by default. Don't sort identical.
80-
Out[5]: Index(['b', 'a'], dtype='object')
81-
82-
In [6]: idx.intersection(idx, sort=True)
83-
Out[6]: Index(['b', 'a'], dtype='object')
84-
85-
**New Behavior**
86-
87-
.. ipython:: python
36+
would do not sort the result when the values could not be compared.
8837

89-
idx.intersection(idx) # sort=False by default
90-
idx.intersection(idx, sort=True)
38+
For :meth:`Index.intersection` the option of ``sort=True`` is also renamed
39+
to ``sort=None`` (but for :meth:`Index.intersection` it is not the default), as
40+
the result is not sorted when ``self`` and ``other`` were identical.
9141

9242
.. _whatsnew_0241.regressions:
9343

pandas/_libs/lib.pyx

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,8 +239,6 @@ def fast_unique_multiple(list arrays, sort: bool=True):
239239
except Exception:
240240
# TODO: RuntimeWarning?
241241
pass
242-
elif sort:
243-
uniques.sort()
244242

245243
return uniques
246244

pandas/core/indexes/base.py

Lines changed: 22 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2245,6 +2245,11 @@ def _get_reconciled_name_object(self, other):
22452245
return self._shallow_copy(name=name)
22462246
return self
22472247

2248+
def _validate_sort_keyword(self, sort):
2249+
if sort not in [None, False]:
2250+
raise ValueError("The 'sort' keyword only takes the values of "
2251+
"None or False; {0} was passed.".format(sort))
2252+
22482253
def union(self, other, sort=None):
22492254
"""
22502255
Form the union of two Index objects.
@@ -2262,16 +2267,14 @@ def union(self, other, sort=None):
22622267
3. Some values in `self` or `other` cannot be compared.
22632268
A RuntimeWarning is issued in this case.
22642269
2265-
* True : sort the result. A TypeError is raised when the
2266-
values cannot be compared.
22672270
* False : do not sort the result.
22682271
22692272
.. versionadded:: 0.24.0
22702273
22712274
.. versionchanged:: 0.24.1
22722275
2273-
Changed the default `sort` to None, matching the
2274-
behavior of pandas 0.23.4 and earlier.
2276+
Changed the default `sort` from True to None (without
2277+
change in behaviour).
22752278
22762279
Returns
22772280
-------
@@ -2285,20 +2288,15 @@ def union(self, other, sort=None):
22852288
>>> idx1.union(idx2)
22862289
Int64Index([1, 2, 3, 4, 5, 6], dtype='int64')
22872290
"""
2291+
self._validate_sort_keyword(sort)
22882292
self._assert_can_do_setop(other)
22892293
other = ensure_index(other)
22902294

22912295
if len(other) == 0 or self.equals(other):
2292-
result = self._get_reconciled_name_object(other)
2293-
if sort:
2294-
result = result.sort_values()
2295-
return result
2296+
return self._get_reconciled_name_object(other)
22962297

22972298
if len(self) == 0:
2298-
result = other._get_reconciled_name_object(self)
2299-
if sort:
2300-
result = result.sort_values()
2301-
return result
2299+
return other._get_reconciled_name_object(self)
23022300

23032301
# TODO: is_dtype_union_equal is a hack around
23042302
# 1. buggy set ops with duplicates (GH #13432)
@@ -2348,9 +2346,6 @@ def union(self, other, sort=None):
23482346
warnings.warn("{}, sort order is undefined for "
23492347
"incomparable objects".format(e),
23502348
RuntimeWarning, stacklevel=3)
2351-
elif sort:
2352-
# raise if not sortable.
2353-
result = sorting.safe_sort(result)
23542349

23552350
# for subclasses
23562351
return self._wrap_setop_result(other, result)
@@ -2367,12 +2362,12 @@ def intersection(self, other, sort=False):
23672362
Parameters
23682363
----------
23692364
other : Index or array-like
2370-
sort : bool or None, default False
2365+
sort : False or None, default False
23712366
Whether to sort the resulting index.
23722367
23732368
* False : do not sort the result.
2374-
* True : sort the result. A TypeError is raised when the
2375-
values cannot be compared.
2369+
* None : sort the result, except when `self` and `other` are equal
2370+
or when the values cannot be compared.
23762371
23772372
.. versionadded:: 0.24.0
23782373
@@ -2392,14 +2387,12 @@ def intersection(self, other, sort=False):
23922387
>>> idx1.intersection(idx2)
23932388
Int64Index([3, 4], dtype='int64')
23942389
"""
2390+
self._validate_sort_keyword(sort)
23952391
self._assert_can_do_setop(other)
23962392
other = ensure_index(other)
23972393

23982394
if self.equals(other):
2399-
result = self._get_reconciled_name_object(other)
2400-
if sort:
2401-
result = result.sort_values()
2402-
return result
2395+
return self._get_reconciled_name_object(other)
24032396

24042397
if not is_dtype_equal(self.dtype, other.dtype):
24052398
this = self.astype('O')
@@ -2434,7 +2427,7 @@ def intersection(self, other, sort=False):
24342427

24352428
taken = other.take(indexer)
24362429

2437-
if sort:
2430+
if sort is None:
24382431
taken = sorting.safe_sort(taken.values)
24392432
if self.name != other.name:
24402433
name = None
@@ -2457,22 +2450,20 @@ def difference(self, other, sort=None):
24572450
Parameters
24582451
----------
24592452
other : Index or array-like
2460-
sort : bool or None, default None
2453+
sort : False or None, default None
24612454
Whether to sort the resulting index. By default, the
24622455
values are attempted to be sorted, but any TypeError from
24632456
incomparable elements is caught by pandas.
24642457
24652458
* None : Attempt to sort the result, but catch any TypeErrors
24662459
from comparing incomparable elements.
24672460
* False : Do not sort the result.
2468-
* True : Sort the result, raising a TypeError if any elements
2469-
cannot be compared.
24702461
24712462
.. versionadded:: 0.24.0
24722463
24732464
.. versionchanged:: 0.24.1
24742465
2475-
Added the `None` option, which matches the behavior of
2466+
Changed `True` to `None`, which matches the behavior of
24762467
pandas 0.23.4 and earlier.
24772468
24782469
Returns
@@ -2489,6 +2480,7 @@ def difference(self, other, sort=None):
24892480
>>> idx1.difference(idx2, sort=False)
24902481
Int64Index([2, 1], dtype='int64')
24912482
"""
2483+
self._validate_sort_keyword(sort)
24922484
self._assert_can_do_setop(other)
24932485

24942486
if self.equals(other):
@@ -2510,8 +2502,6 @@ def difference(self, other, sort=None):
25102502
the_diff = sorting.safe_sort(the_diff)
25112503
except TypeError:
25122504
pass
2513-
elif sort:
2514-
the_diff = sorting.safe_sort(the_diff)
25152505

25162506
return this._shallow_copy(the_diff, name=result_name, freq=None)
25172507

@@ -2523,22 +2513,20 @@ def symmetric_difference(self, other, result_name=None, sort=None):
25232513
----------
25242514
other : Index or array-like
25252515
result_name : str
2526-
sort : bool or None, default None
2516+
sort : False or None, default None
25272517
Whether to sort the resulting index. By default, the
25282518
values are attempted to be sorted, but any TypeError from
25292519
incomparable elements is caught by pandas.
25302520
25312521
* None : Attempt to sort the result, but catch any TypeErrors
25322522
from comparing incomparable elements.
25332523
* False : Do not sort the result.
2534-
* True : Sort the result, raising a TypeError if any elements
2535-
cannot be compared.
25362524
25372525
.. versionadded:: 0.24.0
25382526
25392527
.. versionchanged:: 0.24.1
25402528
2541-
Added the `None` option, which matches the behavior of
2529+
Changed `True` to `None`, which matches the behavior of
25422530
pandas 0.23.4 and earlier.
25432531
25442532
Returns
@@ -2564,6 +2552,7 @@ def symmetric_difference(self, other, result_name=None, sort=None):
25642552
>>> idx1 ^ idx2
25652553
Int64Index([1, 5], dtype='int64')
25662554
"""
2555+
self._validate_sort_keyword(sort)
25672556
self._assert_can_do_setop(other)
25682557
other, result_name_update = self._convert_can_do_setop(other)
25692558
if result_name is None:
@@ -2589,8 +2578,6 @@ def symmetric_difference(self, other, result_name=None, sort=None):
25892578
the_diff = sorting.safe_sort(the_diff)
25902579
except TypeError:
25912580
pass
2592-
elif sort:
2593-
the_diff = sorting.safe_sort(the_diff)
25942581

25952582
attribs = self._get_attributes_dict()
25962583
attribs['name'] = result_name

pandas/core/indexes/datetimes.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -602,19 +602,21 @@ def intersection(self, other, sort=False):
602602
Parameters
603603
----------
604604
other : DatetimeIndex or array-like
605-
sort : bool, default True
605+
sort : False or None, default False
606606
Sort the resulting index if possible.
607607
608608
.. versionadded:: 0.24.0
609609
610610
.. versionchanged:: 0.24.1
611611
612-
Changed the default from ``True`` to ``False``.
612+
Changed the default to ``False`` to match the behaviour
613+
from before 0.24.0.
613614
614615
Returns
615616
-------
616617
y : Index or DatetimeIndex
617618
"""
619+
self._validate_sort_keyword(sort)
618620
self._assert_can_do_setop(other)
619621

620622
if self.equals(other):

pandas/core/indexes/interval.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1093,7 +1093,7 @@ def equals(self, other):
10931093
def overlaps(self, other):
10941094
return self._data.overlaps(other)
10951095

1096-
def _setop(op_name, sort=True):
1096+
def _setop(op_name, sort=None):
10971097
def func(self, other, sort=sort):
10981098
other = self._as_like_interval_index(other)
10991099

0 commit comments

Comments
 (0)