Skip to content

Commit 3ef2a69

Browse files
committed
Use _shallow_copy(). Fix issue in Index.difference. Make tests try variety of index types.
1 parent aad5884 commit 3ef2a69

File tree

10 files changed

+215
-122
lines changed

10 files changed

+215
-122
lines changed

doc/source/whatsnew/v0.23.0.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -837,7 +837,8 @@ Indexing
837837
- Bug in :func:`IntervalIndex.symmetric_difference` where the symmetric difference with a non-``IntervalIndex`` did not raise (:issue:`18475`)
838838
- Bug in :class:`IntervalIndex` where set operations that returned an empty ``IntervalIndex`` had the wrong dtype (:issue:`19101`)
839839
- Bug in :meth:`DataFrame.drop_duplicates` where no ``KeyError`` is raised when passing in columns that don't exist on the ``DataFrame`` (issue:`19726`)
840-
- Bug in :func:`Index.union` where resulting names were not computed correctly for certain cases (:issue:`9943`, :issue:`9862`)
840+
- Bug in :func:`Index.union` and :func:`Index.intersection` where name of the `Index` of the result was not computed correctly for certain cases (:issue:`9943`, :issue:`9862`)
841+
- Bug in :func:`Index.difference` when taking difference of set and itself as type was not preserved.
841842

842843

843844
MultiIndex

pandas/core/indexes/base.py

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1240,9 +1240,9 @@ def set_names(self, names, level=None, inplace=False):
12401240
Examples
12411241
--------
12421242
>>> Index([1, 2, 3, 4]).set_names('foo')
1243-
Int64Index([1, 2, 3, 4], dtype='int64')
1243+
Int64Index([1, 2, 3, 4], dtype='int64', name='foo')
12441244
>>> Index([1, 2, 3, 4]).set_names(['foo'])
1245-
Int64Index([1, 2, 3, 4], dtype='int64')
1245+
Int64Index([1, 2, 3, 4], dtype='int64', name='foo')
12461246
>>> idx = MultiIndex.from_tuples([(1, u'one'), (1, u'two'),
12471247
(2, u'one'), (2, u'two')],
12481248
names=['foo', 'bar'])
@@ -2263,17 +2263,37 @@ def __or__(self, other):
22632263
def __xor__(self, other):
22642264
return self.symmetric_difference(other)
22652265

2266-
def _get_setop_name_object(self, other):
2266+
def _get_reconciled_name_object(self, other):
22672267
"""
2268-
Given 2 indexes, give a setop name and object, meaning
2269-
we use get_op_result_name to return the name, and then
2270-
return a new object if we are resetting the name
2268+
If the result of a set operation will be self,
2269+
return self, unless the name changes, in which
2270+
case make a shallow copy of self.
22712271
"""
22722272
name = get_op_result_name(self, other)
22732273
if self.name != name:
22742274
return self._shallow_copy(name=name)
22752275
return self
22762276

2277+
def _union_corner_case(self, other):
2278+
"""
2279+
If self or other have no length, or self and other
2280+
are the same, then return self, after reconciling names
2281+
2282+
Returns
2283+
-------
2284+
Tuple of (is_corner, result), where is_corner is True if
2285+
it is a corner case, and result is the reconciled result
2286+
2287+
"""
2288+
# GH 9943 9862
2289+
if len(other) == 0 or self.equals(other):
2290+
return (True, self._get_reconciled_name_object(other))
2291+
2292+
if len(self) == 0:
2293+
return (True, other._get_reconciled_name_object(self))
2294+
2295+
return (False, None)
2296+
22772297
def union(self, other):
22782298
"""
22792299
Form the union of two Index objects and sorts if possible.
@@ -2298,11 +2318,9 @@ def union(self, other):
22982318
self._assert_can_do_setop(other)
22992319
other = _ensure_index(other)
23002320

2301-
if len(other) == 0 or self.equals(other):
2302-
return self._get_setop_name_object(other)
2303-
2304-
if len(self) == 0:
2305-
return other._get_setop_name_object(self)
2321+
is_corner_case, corner_result = self._union_corner_case(other)
2322+
if is_corner_case:
2323+
return corner_result
23062324

23072325
# TODO: is_dtype_union_equal is a hack around
23082326
# 1. buggy set ops with duplicates (GH #13432)
@@ -2398,7 +2416,7 @@ def intersection(self, other):
23982416
other = _ensure_index(other)
23992417

24002418
if self.equals(other):
2401-
return self._get_setop_name_object(other)
2419+
return self._get_reconciled_name_object(other)
24022420

24032421
if not is_dtype_equal(self.dtype, other.dtype):
24042422
this = self.astype('O')
@@ -2436,6 +2454,13 @@ def intersection(self, other):
24362454
taken.name = None
24372455
return taken
24382456

2457+
def _create_empty_index(self, name):
2458+
"""
2459+
Returns an empty index. Overridden as necessary by
2460+
subclasses that have different constructors.
2461+
"""
2462+
return self.__class__([], name=name)
2463+
24392464
def difference(self, other):
24402465
"""
24412466
Return a new Index with elements from the index that are not in
@@ -2464,7 +2489,7 @@ def difference(self, other):
24642489
self._assert_can_do_setop(other)
24652490

24662491
if self.equals(other):
2467-
return Index([], name=self.name)
2492+
return self._create_empty_index(get_op_result_name(self, other))
24682493

24692494
other, result_name = self._convert_can_do_setop(other)
24702495

pandas/core/indexes/category.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import pandas.core.base as base
2626
import pandas.core.missing as missing
2727
import pandas.core.indexes.base as ibase
28+
from pandas.core.ops import get_op_result_name
2829

2930
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
3031
_index_doc_kwargs.update(dict(target_klass='CategoricalIndex'))
@@ -300,6 +301,12 @@ def itemsize(self):
300301
# Size of the items in categories, not codes.
301302
return self.values.itemsize
302303

304+
def _wrap_setop_result(self, other, result):
305+
name = get_op_result_name(self, other)
306+
return self._simple_new(result, name=name,
307+
categories=self.categories,
308+
ordered=self.ordered)
309+
303310
def get_values(self):
304311
""" return the underlying data as an ndarray """
305312
return self._data.get_values()
@@ -716,6 +723,13 @@ def insert(self, loc, item):
716723
codes = np.concatenate((codes[:loc], code, codes[loc:]))
717724
return self._create_from_codes(codes)
718725

726+
def _create_empty_index(self, name):
727+
"""
728+
Returns an empty index using categories and ordered of self
729+
"""
730+
return CategoricalIndex([], categories=self.categories,
731+
ordered=self.ordered, name=name)
732+
719733
def _concat(self, to_concat, name):
720734
# if calling index is category, don't check dtype of others
721735
return CategoricalIndex._concat_same_dtype(self, to_concat, name)

pandas/core/indexes/datetimes.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1137,6 +1137,11 @@ def union(self, other):
11371137
y : Index or DatetimeIndex
11381138
"""
11391139
self._assert_can_do_setop(other)
1140+
1141+
is_corner_case, corner_result = self._union_corner_case(other)
1142+
if is_corner_case:
1143+
return corner_result
1144+
11401145
if not isinstance(other, DatetimeIndex):
11411146
try:
11421147
other = DatetimeIndex(other)
@@ -1354,6 +1359,10 @@ def intersection(self, other):
13541359
y : Index or DatetimeIndex
13551360
"""
13561361
self._assert_can_do_setop(other)
1362+
1363+
if self.equals(other):
1364+
return self._get_reconciled_name_object(other)
1365+
13571366
if not isinstance(other, DatetimeIndex):
13581367
try:
13591368
other = DatetimeIndex(other)

pandas/core/indexes/multi.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2733,7 +2733,7 @@ def intersection(self, other):
27332733
other_tuples = other._ndarray_values
27342734
uniq_tuples = sorted(set(self_tuples) & set(other_tuples))
27352735
if len(uniq_tuples) == 0:
2736-
return MultiIndex(levels=[[]] * self.nlevels,
2736+
return MultiIndex(levels=self.levels,
27372737
labels=[[]] * self.nlevels,
27382738
names=result_names, verify_integrity=False)
27392739
else:
@@ -2755,7 +2755,7 @@ def difference(self, other):
27552755
return self
27562756

27572757
if self.equals(other):
2758-
return MultiIndex(levels=[[]] * self.nlevels,
2758+
return MultiIndex(levels=self.levels,
27592759
labels=[[]] * self.nlevels,
27602760
names=result_names, verify_integrity=False)
27612761

pandas/core/indexes/period.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -963,6 +963,12 @@ def _convert_tolerance(self, tolerance, target):
963963
'target index size')
964964
return self._maybe_convert_timedelta(tolerance)
965965

966+
def _create_empty_index(self, name):
967+
"""
968+
Returns an empty index using freq of self
969+
"""
970+
return PeriodIndex([], freq=self.freq, name=name)
971+
966972
def insert(self, loc, item):
967973
if not isinstance(item, Period) or self.freq != item.freq:
968974
return self.astype(object).insert(loc, item)

pandas/core/indexes/range.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,8 @@ def tolist(self):
256256
@Appender(_index_shared_docs['_shallow_copy'])
257257
def _shallow_copy(self, values=None, **kwargs):
258258
if values is None:
259-
return RangeIndex(name=self.name, fastpath=True,
259+
name = kwargs.get("name", self.name)
260+
return RangeIndex(name=name, fastpath=True,
260261
**dict(self._get_data_as_items()))
261262
else:
262263
kwargs.setdefault('name', self.name)
@@ -337,6 +338,10 @@ def intersection(self, other):
337338
-------
338339
intersection : Index
339340
"""
341+
342+
if self.equals(other):
343+
return self._get_reconciled_name_object(other)
344+
340345
if not isinstance(other, RangeIndex):
341346
return super(RangeIndex, self).intersection(other)
342347

@@ -417,10 +422,10 @@ def union(self, other):
417422
union : Index
418423
"""
419424
self._assert_can_do_setop(other)
420-
if len(other) == 0 or self.equals(other):
421-
return self
422-
if len(self) == 0:
423-
return other
425+
is_corner_case, corner_result = self._union_corner_case(other)
426+
if is_corner_case:
427+
return corner_result
428+
424429
if isinstance(other, RangeIndex):
425430
start_s, step_s = self._start, self._step
426431
end_s = self._start + self._step * (len(self) - 1)
@@ -474,6 +479,12 @@ def join(self, other, how='left', level=None, return_indexers=False,
474479
def _concat_same_dtype(self, indexes, name):
475480
return _concat._concat_rangeindex_same_dtype(indexes).rename(name)
476481

482+
def _create_empty_index(self, name):
483+
"""
484+
Returns an empty index using step size of self
485+
"""
486+
return RangeIndex(start=None, stop=None, step=self._step, name=name)
487+
477488
def __len__(self):
478489
"""
479490
return the length of the RangeIndex

pandas/core/indexes/timedeltas.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,11 @@ def union(self, other):
546546
y : Index or TimedeltaIndex
547547
"""
548548
self._assert_can_do_setop(other)
549+
550+
is_corner_case, corner_result = self._union_corner_case(other)
551+
if is_corner_case:
552+
return corner_result
553+
549554
if not isinstance(other, TimedeltaIndex):
550555
try:
551556
other = TimedeltaIndex(other)
@@ -638,10 +643,6 @@ def _fast_union(self, other):
638643
else:
639644
return left
640645

641-
def _wrap_setop_result(self, other, result):
642-
name = get_op_result_name(self, other)
643-
return self._simple_new(result, name=name, freq=None)
644-
645646
def intersection(self, other):
646647
"""
647648
Specialized intersection for TimedeltaIndex objects. May be much faster
@@ -656,6 +657,10 @@ def intersection(self, other):
656657
y : Index or TimedeltaIndex
657658
"""
658659
self._assert_can_do_setop(other)
660+
661+
if self.equals(other):
662+
return self._get_reconciled_name_object(other)
663+
659664
if not isinstance(other, TimedeltaIndex):
660665
try:
661666
other = TimedeltaIndex(other)

0 commit comments

Comments
 (0)