Skip to content

Commit ee9fbb6

Browse files
committed
Merge pull request #12109 from kawochen/ENH-12034-union
ENH: GH12034 RangeIndex.union with RangeIndex returns RangeIndex if possible
2 parents f5550e2 + aad37a5 commit ee9fbb6

File tree

3 files changed

+76
-2
lines changed

3 files changed

+76
-2
lines changed

doc/source/whatsnew/v0.18.0.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ Range Index
110110

111111
A ``RangeIndex`` has been added to the ``Int64Index`` sub-classes to support a memory saving alternative for common use cases. This has a similar implementation to the python ``range`` object (``xrange`` in python 2), in that it only stores the start, stop, and step values for the index. It will transparently interact with the user API, converting to ``Int64Index`` if needed.
112112

113-
This will now be the default constructed index for ``NDFrame`` objects, rather than previous an ``Int64Index``. (:issue:`939`, :issue:`12070`, :issue:`12071`)
113+
This will now be the default constructed index for ``NDFrame`` objects, rather than previous an ``Int64Index``. (:issue:`939`, :issue:`12070`, :issue:`12071`, :issue:`12109`)
114114

115115
Previous Behavior:
116116

pandas/core/index.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4307,7 +4307,48 @@ def union(self, other):
43074307
-------
43084308
union : Index
43094309
"""
4310-
# note: could return a RangeIndex in some circumstances
4310+
self._assert_can_do_setop(other)
4311+
if len(other) == 0 or self.equals(other):
4312+
return self
4313+
if len(self) == 0:
4314+
return other
4315+
if isinstance(other, RangeIndex):
4316+
start_s, step_s = self._start, self._step
4317+
end_s = self._start + self._step * (len(self) - 1)
4318+
start_o, step_o = other._start, other._step
4319+
end_o = other._start + other._step * (len(other) - 1)
4320+
if self._step < 0:
4321+
start_s, step_s, end_s = end_s, -step_s, start_s
4322+
if other._step < 0:
4323+
start_o, step_o, end_o = end_o, -step_o, start_o
4324+
if len(self) == 1 and len(other) == 1:
4325+
step_s = step_o = abs(self._start - other._start)
4326+
elif len(self) == 1:
4327+
step_s = step_o
4328+
elif len(other) == 1:
4329+
step_o = step_s
4330+
start_r = min(start_s, start_o)
4331+
end_r = max(end_s, end_o)
4332+
if step_o == step_s:
4333+
if ((start_s - start_o) % step_s == 0 and
4334+
(start_s - end_o) <= step_s and
4335+
(start_o - end_s) <= step_s):
4336+
return RangeIndex(start_r, end_r + step_s, step_s)
4337+
if ((step_s % 2 == 0) and
4338+
(abs(start_s - start_o) <= step_s / 2) and
4339+
(abs(end_s - end_o) <= step_s / 2)):
4340+
return RangeIndex(start_r, end_r + step_s / 2, step_s / 2)
4341+
elif step_o % step_s == 0:
4342+
if ((start_o - start_s) % step_s == 0 and
4343+
(start_o + step_s >= start_s) and
4344+
(end_o - step_s <= end_s)):
4345+
return RangeIndex(start_r, end_r + step_s, step_s)
4346+
elif step_s % step_o == 0:
4347+
if ((start_s - start_o) % step_o == 0 and
4348+
(start_s + step_o >= start_o) and
4349+
(end_s - step_o <= end_o)):
4350+
return RangeIndex(start_r, end_r + step_o, step_o)
4351+
43114352
return self._int64index.union(other)
43124353

43134354
def join(self, other, how='left', level=None, return_indexers=False):

pandas/tests/test_index.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4130,6 +4130,39 @@ def test_union_noncomparable(self):
41304130
expected = np.concatenate((other, self.index))
41314131
self.assert_numpy_array_equal(result, expected)
41324132

4133+
def test_union(self):
4134+
RI = RangeIndex
4135+
I64 = Int64Index
4136+
cases = [(RI(0, 10, 1), RI(0, 10, 1), RI(0, 10, 1)),
4137+
(RI(0, 10, 1), RI(5, 20, 1), RI(0, 20, 1)),
4138+
(RI(0, 10, 1), RI(10, 20, 1), RI(0, 20, 1)),
4139+
(RI(0, -10, -1), RI(0, -10, -1), RI(0, -10, -1)),
4140+
(RI(0, -10, -1), RI(-10, -20, -1), RI(-19, 1, 1)),
4141+
(RI(0, 10, 2), RI(1, 10, 2), RI(0, 10, 1)),
4142+
(RI(0, 11, 2), RI(1, 12, 2), RI(0, 12, 1)),
4143+
(RI(0, 21, 4), RI(-2, 24, 4), RI(-2, 24, 2)),
4144+
(RI(0, -20, -2), RI(-1, -21, -2), RI(-19, 1, 1)),
4145+
(RI(0, 100, 5), RI(0, 100, 20), RI(0, 100, 5)),
4146+
(RI(0, -100, -5), RI(5, -100, -20), RI(-95, 10, 5)),
4147+
(RI(0, -11, -1), RI(1, -12, -4), RI(-11, 2, 1)),
4148+
(RI(), RI(), RI()),
4149+
(RI(0, -10, -2), RI(), RI(0, -10, -2)),
4150+
(RI(0, 100, 2), RI(100, 150, 200), RI(0, 102, 2)),
4151+
(RI(0, -100, -2), RI(-100, 50, 102), RI(-100, 4, 2)),
4152+
(RI(0, -100, -1), RI(0, -50, -3), RI(-99, 1, 1)),
4153+
(RI(0, 1, 1), RI(5, 6, 10), RI(0, 6, 5)),
4154+
(RI(0, 10, 5), RI(-5, -6, -20), RI(-5, 10, 5)),
4155+
(RI(0, 3, 1), RI(4, 5, 1), I64([0, 1, 2, 4])),
4156+
(RI(0, 10, 1), I64([]), RI(0, 10, 1)),
4157+
(RI(), I64([1, 5, 6]), I64([1, 5, 6]))]
4158+
for idx1, idx2, expected in cases:
4159+
res1 = idx1.union(idx2)
4160+
res2 = idx2.union(idx1)
4161+
res3 = idx1._int64index.union(idx2)
4162+
tm.assert_index_equal(res1, expected, exact=True)
4163+
tm.assert_index_equal(res2, expected, exact=True)
4164+
tm.assert_index_equal(res3, expected)
4165+
41334166
def test_nbytes(self):
41344167

41354168
# memory savings vs int index

0 commit comments

Comments
 (0)