Skip to content

Commit 041b4cd

Browse files
author
Jiang Yue
committed
refactor validate code and add tests for set_code and set_level
1 parent b6d29bd commit 041b4cd

File tree

3 files changed

+41
-26
lines changed

3 files changed

+41
-26
lines changed

doc/source/whatsnew/v0.25.0.rst

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -387,8 +387,7 @@ MultiIndex
387387
^^^^^^^^^^
388388

389389
- Bug in which incorrect exception raised by :class:`Timedelta` when testing the membership of :class:`MultiIndex` (:issue:`24570`)
390-
- Bug in :class:`Multindex` construction from levels and codes that would incorrectly allows code values < -1
391-
- Bug in :class:`Multindex` construction from levels and codes that would incorrectly allows NaN levels (:issue:`19387`)
390+
- Bug in :class:`Multindex` construction from levels and codes that would incorrectly allows code values < -1 or NaN levels (:issue:`19387`)
392391

393392
I/O
394393
^^^

pandas/core/indexes/multi.py

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -244,17 +244,12 @@ def __new__(cls, levels=None, codes=None, sortorder=None, names=None,
244244
if verify_integrity:
245245
result._verify_integrity()
246246

247-
codes = [cls._reassign_na_codes(level, code)
248-
for level, code in zip(levels, codes)]
249-
result._set_codes(codes, validate=False)
250-
251247
if _set_identity:
252248
result._reset_identity()
253249

254250
return result
255251

256-
@classmethod
257-
def _reassign_na_codes(cls, level, code):
252+
def _validate_codes(cls, level, code):
258253
null_mask = isna(level)
259254
if np.any(null_mask):
260255
code = np.where(null_mask[code], -1, code)
@@ -290,19 +285,26 @@ def _verify_integrity(self, codes=None, levels=None):
290285
raise ValueError("Unequal code lengths: %s" %
291286
([len(code_) for code_ in codes]))
292287
if len(level_codes) and level_codes.max() >= len(level):
293-
raise ValueError("On level %d, code max (%d) >= length of"
294-
" level (%d). NOTE: this index is in an"
295-
" inconsistent state" % (i, level_codes.max(),
296-
len(level)))
288+
raise ValueError("On level {level}, code max ({max_code})"
289+
" >= length of level ({level_len}). "
290+
"NOTE: this index is in an inconsistent"
291+
" state".format(
292+
level=i, max_code=level_codes.max(),
293+
level_len=len(level)))
297294
if len(level_codes) and level_codes.min() < -1:
298-
raise ValueError("On level %d, code value (%d) < -1" %
299-
(i, level_codes.min()))
295+
raise ValueError("On level {level}, code value ({code})"
296+
" < -1".format(
297+
level=i, code=level_codes.min()))
300298
if not level.is_unique:
301299
raise ValueError("Level values must be unique: {values} on "
302300
"level {level}".format(
303301
values=[value for value in level],
304302
level=i))
305303

304+
codes = [self._validate_codes(level, code)
305+
for level, code in zip(levels, codes)]
306+
self._set_codes(codes, validate=False)
307+
306308
@classmethod
307309
def from_arrays(cls, arrays, sortorder=None, names=None):
308310
"""
@@ -691,7 +693,6 @@ def labels(self):
691693

692694
def _set_codes(self, codes, level=None, copy=False, validate=True,
693695
verify_integrity=False):
694-
695696
if validate and level is None and len(codes) != self.nlevels:
696697
raise ValueError("Length of codes must match number of levels")
697698
if validate and level is not None and len(codes) != len(level):
@@ -712,8 +713,9 @@ def _set_codes(self, codes, level=None, copy=False, validate=True,
712713

713714
if verify_integrity:
714715
self._verify_integrity(codes=new_codes)
716+
else:
717+
self._codes = new_codes
715718

716-
self._codes = new_codes
717719
self._tuples = None
718720
self._reset_cache()
719721

pandas/tests/indexes/multi/test_constructor.py

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -90,16 +90,30 @@ def test_constructor_mismatched_codes_levels(idx):
9090

9191

9292
def test_na_levels():
93-
tm.assert_index_equal(
94-
MultiIndex(levels=[[np.nan, None, pd.NaT, 128, 2]],
95-
codes=[[0, -1, 1, 2, 3, 4]]),
96-
MultiIndex(levels=[[np.nan, None, pd.NaT, 128, 2]],
97-
codes=[[-1, -1, -1, -1, 3, 4]]))
98-
tm.assert_index_equal(
99-
MultiIndex(levels=[[np.nan, 's', pd.NaT, 128, None]],
100-
codes=[[0, -1, 1, 2, 3, 4]]),
101-
MultiIndex(levels=[[np.nan, 's', pd.NaT, 128, None]],
102-
codes=[[-1, -1, 1, -1, 3, -1]]))
93+
# GH26408
94+
result = MultiIndex(levels=[[np.nan, None, pd.NaT, 128, 2]],
95+
codes=[[0, -1, 1, 2, 3, 4]])
96+
expected = MultiIndex(levels=[[np.nan, None, pd.NaT, 128, 2]],
97+
codes=[[-1, -1, -1, -1, 3, 4]])
98+
tm.assert_index_equal(result, expected)
99+
100+
result = MultiIndex(levels=[[np.nan, 's', pd.NaT, 128, None]],
101+
codes=[[0, -1, 1, 2, 3, 4]])
102+
expected = MultiIndex(levels=[[np.nan, 's', pd.NaT, 128, None]],
103+
codes=[[-1, -1, 1, -1, 3, -1]])
104+
tm.assert_index_equal(result, expected)
105+
106+
# verify set_levels and set_codes
107+
result = MultiIndex(
108+
levels=[[1, 2, 3, 4, 5]], codes=[[0, -1, 1, 2, 3, 4]]).set_levels(
109+
[[np.nan, 's', pd.NaT, 128, None]])
110+
tm.assert_index_equal(result, expected)
111+
112+
result = MultiIndex(
113+
levels=[[np.nan, 's', pd.NaT, 128, None]],
114+
codes=[[1, 2, 2, 2, 2, 2]]).set_codes(
115+
[[0, -1, 1, 2, 3, 4]])
116+
tm.assert_index_equal(result, expected)
103117

104118

105119
def test_labels_deprecated(idx):

0 commit comments

Comments
 (0)