Skip to content

Commit 12bcfed

Browse files
committed
Generalize NA Compat
1 parent 2c4bf16 commit 12bcfed

File tree

14 files changed

+111
-38
lines changed

14 files changed

+111
-38
lines changed

doc/source/whatsnew/v0.22.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ Other API Changes
7777
- :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`)
7878
- :func:`Dataframe.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`)
7979
- :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`)
80+
- Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`)
8081

8182

8283
.. _whatsnew_0220.deprecations:

pandas/core/indexes/base.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3734,6 +3734,10 @@ def insert(self, loc, item):
37343734
-------
37353735
new_index : Index
37363736
"""
3737+
if lib.checknull(item):
3738+
# GH 18295
3739+
item = self._na_value
3740+
37373741
_self = np.asarray(self)
37383742
item = self._coerce_scalar_to_index(item)._values
37393743
idx = np.concatenate((_self[:loc], item, _self[loc:]))

pandas/core/indexes/category.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import numpy as np
2-
from pandas._libs import index as libindex
2+
from pandas._libs import index as libindex, lib
33

44
from pandas import compat
55
from pandas.compat.numpy import function as nv
@@ -690,7 +690,7 @@ def insert(self, loc, item):
690690
691691
"""
692692
code = self.categories.get_indexer([item])
693-
if (code == -1):
693+
if (code == -1) and not lib.checknull(item):
694694
raise TypeError("cannot insert an item into a CategoricalIndex "
695695
"that is not already an existing category")
696696

pandas/core/indexes/datetimes.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1757,6 +1757,9 @@ def insert(self, loc, item):
17571757
-------
17581758
new_index : Index
17591759
"""
1760+
if lib.checknull(item):
1761+
# GH 18295
1762+
item = self._na_value
17601763

17611764
freq = None
17621765

@@ -1773,14 +1776,14 @@ def insert(self, loc, item):
17731776
elif (loc == len(self)) and item - self.freq == self[-1]:
17741777
freq = self.freq
17751778
item = _to_m8(item, tz=self.tz)
1779+
17761780
try:
17771781
new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)],
17781782
self[loc:].asi8))
17791783
if self.tz is not None:
17801784
new_dates = conversion.tz_convert(new_dates, 'UTC', self.tz)
17811785
return DatetimeIndex(new_dates, name=self.name, freq=freq,
17821786
tz=self.tz)
1783-
17841787
except (AttributeError, TypeError):
17851788

17861789
# fall back to object index

pandas/core/indexes/interval.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
Index, _ensure_index,
2424
default_pprint, _index_shared_docs)
2525

26-
from pandas._libs import Timestamp, Timedelta
26+
from pandas._libs import lib, Timestamp, Timedelta
2727
from pandas._libs.interval import (
2828
Interval, IntervalMixin, IntervalTree,
2929
intervals_to_interval_bounds)
@@ -1007,12 +1007,8 @@ def insert(self, loc, item):
10071007
'side as the index')
10081008
left_insert = item.left
10091009
right_insert = item.right
1010-
elif is_scalar(item) and isna(item):
1010+
elif lib.checknull(item):
10111011
# GH 18295
1012-
if item is not self.left._na_value:
1013-
raise TypeError('cannot insert with incompatible NA value: '
1014-
'got {item}, expected {na}'
1015-
.format(item=item, na=self.left._na_value))
10161012
left_insert = right_insert = item
10171013
else:
10181014
raise ValueError('can only insert Interval objects and NA into '

pandas/core/indexes/timedeltas.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -852,16 +852,18 @@ def insert(self, loc, item):
852852
-------
853853
new_index : Index
854854
"""
855-
856855
# try to convert if possible
857856
if _is_convertible_to_td(item):
858857
try:
859858
item = Timedelta(item)
860859
except Exception:
861860
pass
861+
elif lib.checknull(item):
862+
# GH 18295
863+
item = self._na_value
862864

863865
freq = None
864-
if isinstance(item, Timedelta) or item is NaT:
866+
if isinstance(item, Timedelta) or (item is self._na_value):
865867

866868
# check freq can be preserved on edge cases
867869
if self.freq is not None:

pandas/tests/indexes/datetimes/test_indexing.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,13 @@ def test_insert(self):
145145
assert result.tz == expected.tz
146146
assert result.freq is None
147147

148+
# GH 18295 (test missing)
149+
expected = DatetimeIndex(
150+
['20170101', pd.NaT, '20170102', '20170103', '20170104'])
151+
for na in (np.nan, pd.NaT, None):
152+
result = date_range('20170101', periods=4).insert(1, na)
153+
tm.assert_index_equal(result, expected)
154+
148155
def test_delete(self):
149156
idx = date_range(start='2000-01-01', periods=5, freq='M', name='idx')
150157

pandas/tests/indexes/period/test_period.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -697,3 +697,11 @@ def test_join_self(self, how):
697697
index = period_range('1/1/2000', periods=10)
698698
joined = index.join(index, how=how)
699699
assert index is joined
700+
701+
def test_insert(self):
702+
# GH 18295 (test missing)
703+
expected = PeriodIndex(
704+
['2017Q1', pd.NaT, '2017Q2', '2017Q3', '2017Q4'], freq='Q')
705+
for na in (np.nan, pd.NaT, None):
706+
result = period_range('2017Q1', periods=4, freq='Q').insert(1, na)
707+
tm.assert_index_equal(result, expected)

pandas/tests/indexes/test_base.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,12 @@ def test_insert(self):
457457
null_index = Index([])
458458
tm.assert_index_equal(Index(['a']), null_index.insert(0, 'a'))
459459

460+
# GH 18295 (test missing)
461+
expected = Index(['a', np.nan, 'b', 'c'])
462+
for na in (np.nan, pd.NaT, None):
463+
result = Index(list('abc')).insert(1, na)
464+
tm.assert_index_equal(result, expected)
465+
460466
def test_delete(self):
461467
idx = Index(['a', 'b', 'c', 'd'], name='idx')
462468

pandas/tests/indexes/test_category.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,12 @@ def test_insert(self):
344344
# invalid
345345
pytest.raises(TypeError, lambda: ci.insert(0, 'd'))
346346

347+
# GH 18295 (test missing)
348+
expected = CategoricalIndex(['a', np.nan, 'a', 'b', 'c', 'b'])
349+
for na in (np.nan, pd.NaT, None):
350+
result = CategoricalIndex(list('aabcb')).insert(1, na)
351+
tm.assert_index_equal(result, expected)
352+
347353
def test_delete(self):
348354

349355
ci = self.create_index()

pandas/tests/indexes/test_interval.py

Lines changed: 39 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -366,38 +366,50 @@ def test_delete(self, closed):
366366
result = self.create_index(closed=closed).delete(0)
367367
tm.assert_index_equal(result, expected)
368368

369-
def test_insert(self):
370-
expected = IntervalIndex.from_breaks(range(4))
371-
actual = self.index.insert(2, Interval(2, 3))
372-
assert expected.equals(actual)
373-
374-
pytest.raises(ValueError, self.index.insert, 0, 1)
375-
pytest.raises(ValueError, self.index.insert, 0,
376-
Interval(2, 3, closed='left'))
377-
378369
@pytest.mark.parametrize('data', [
379-
interval_range(0, periods=10),
380-
interval_range(1.7, periods=8, freq=2.5),
381-
interval_range(Timestamp('20170101'), periods=12),
382-
interval_range(Timedelta('1 day'), periods=6),
370+
interval_range(0, periods=10, closed='neither'),
371+
interval_range(1.7, periods=8, freq=2.5, closed='both'),
372+
interval_range(Timestamp('20170101'), periods=12, closed='left'),
373+
interval_range(Timedelta('1 day'), periods=6, closed='right'),
383374
IntervalIndex.from_tuples([('a', 'd'), ('e', 'j'), ('w', 'z')]),
384375
IntervalIndex.from_tuples([(1, 2), ('a', 'z'), (3.14, 6.28)])])
385-
def test_insert_na(self, data):
386-
# GH 18295
387-
valid_na, invalid_na = np.nan, pd.NaT
388-
if data.left._na_value is pd.NaT:
389-
valid_na, invalid_na = invalid_na, valid_na
390-
391-
# valid insertion
392-
expected = IntervalIndex([data[0], np.nan]).append(data[1:])
393-
result = data.insert(1, valid_na)
376+
def test_insert(self, data):
377+
item = data[0]
378+
idx_item = IntervalIndex([item], closed=data.closed)
379+
380+
# start
381+
expected = idx_item.append(data)
382+
result = data.insert(0, item)
394383
tm.assert_index_equal(result, expected)
395384

396-
# invalid insertion
397-
msg = ('cannot insert with incompatible NA value: got {invalid}, '
398-
'expected {valid}').format(invalid=invalid_na, valid=valid_na)
399-
with tm.assert_raises_regex(TypeError, msg):
400-
data.insert(1, invalid_na)
385+
# end
386+
expected = data.append(idx_item)
387+
result = data.insert(len(data), item)
388+
tm.assert_index_equal(result, expected)
389+
390+
# mid
391+
expected = data[:3].append(idx_item).append(data[3:])
392+
result = data.insert(3, item)
393+
tm.assert_index_equal(result, expected)
394+
395+
# invalid type
396+
msg = 'can only insert Interval objects and NA into an IntervalIndex'
397+
with tm.assert_raises_regex(ValueError, msg):
398+
data.insert(1, 'foo')
399+
400+
# invalid closed
401+
msg = 'inserted item must be closed on the same side as the index'
402+
for closed in {'left', 'right', 'both', 'neither'} - {item.closed}:
403+
with tm.assert_raises_regex(ValueError, msg):
404+
bad_item = Interval(item.left, item.right, closed=closed)
405+
data.insert(1, bad_item)
406+
407+
# GH 18295 (test missing)
408+
na_idx = IntervalIndex([np.nan], closed=data.closed)
409+
for na in (np.nan, pd.NaT, None):
410+
expected = data[:1].append(na_idx).append(data[1:])
411+
result = data.insert(1, na)
412+
tm.assert_index_equal(result, expected)
401413

402414
def test_take(self, closed):
403415
index = self.create_index(closed=closed)

pandas/tests/indexes/test_numeric.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ def test_modulo(self):
175175
expected = Index(index.values % 2)
176176
tm.assert_index_equal(index % 2, expected)
177177

178+
<<<<<<< HEAD
178179
@pytest.mark.parametrize('klass', [list, tuple, np.array, Series])
179180
def test_where(self, klass):
180181
i = self.create_index()
@@ -186,6 +187,14 @@ def test_where(self, klass):
186187
expected = Float64Index([i._na_value] + i[1:].tolist())
187188
result = i.where(klass(cond))
188189
tm.assert_index_equal(result, expected)
190+
=======
191+
def test_insert(self):
192+
# GH 18295 (test missing)
193+
expected = Float64Index([0, np.nan, 1, 2, 3, 4])
194+
for na in (np.nan, pd.NaT, None):
195+
result = self.create_index().insert(1, na)
196+
tm.assert_index_equal(result, expected)
197+
>>>>>>> Generalize NA Compat
189198

190199

191200
class TestFloat64Index(Numeric):
@@ -1193,3 +1202,10 @@ def test_join_outer(self):
11931202
tm.assert_index_equal(res, eres)
11941203
tm.assert_numpy_array_equal(lidx, elidx)
11951204
tm.assert_numpy_array_equal(ridx, eridx)
1205+
1206+
def test_insert(self):
1207+
# GH 18295 (test missing)
1208+
expected = UInt64Index([0, 0, 1, 2, 3, 4])
1209+
for na in (np.nan, pd.NaT, None):
1210+
result = self.create_index().insert(1, na)
1211+
tm.assert_index_equal(result, expected)

pandas/tests/indexes/test_range.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,12 @@ def test_insert(self):
295295
# test 0th element
296296
tm.assert_index_equal(idx[0:4], result.insert(0, idx[0]))
297297

298+
# GH 18295 (test missing)
299+
expected = Float64Index([0, np.nan, 1, 2, 3, 4])
300+
for na in (np.nan, pd.NaT, None):
301+
result = RangeIndex(5).insert(1, na)
302+
tm.assert_index_equal(result, expected)
303+
298304
def test_delete(self):
299305

300306
idx = RangeIndex(5, name='Foo')

pandas/tests/indexes/timedeltas/test_indexing.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,12 @@ def test_insert(self):
5757
assert result.name == expected.name
5858
assert result.freq == expected.freq
5959

60+
# GH 18295 (test missing)
61+
expected = TimedeltaIndex(['1day', pd.NaT, '2day', '3day'])
62+
for na in (np.nan, pd.NaT, None):
63+
result = timedelta_range('1day', '3day').insert(1, na)
64+
tm.assert_index_equal(result, expected)
65+
6066
def test_delete(self):
6167
idx = timedelta_range(start='1 Days', periods=5, freq='D', name='idx')
6268

0 commit comments

Comments
 (0)