Skip to content

Commit d4227c6

Browse files
author
Jiang Yue
committed
refactor validate codes and add in doc-strings
1 parent 933615b commit d4227c6

File tree

2 files changed

+41
-20
lines changed

2 files changed

+41
-20
lines changed

pandas/core/indexes/multi.py

Lines changed: 39 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -242,14 +242,30 @@ def __new__(cls, levels=None, codes=None, sortorder=None, names=None,
242242
result.sortorder = sortorder
243243

244244
if verify_integrity:
245-
result._verify_integrity()
245+
new_codes = result._verify_integrity()
246+
result._codes = new_codes
246247

247248
if _set_identity:
248249
result._reset_identity()
249250

250251
return result
251252

252-
def _validate_codes(cls, level, code):
253+
def _validate_codes(self, level, code):
254+
"""
255+
Reassign code values as -1 if their corresponding levels are NaN.
256+
257+
Parameters
258+
----------
259+
code : optional list
260+
Code to reassign.
261+
level : optional list
262+
Level to check for Nan.
263+
264+
Returns
265+
-------
266+
code : new code where code value = -1 if it corresponds
267+
to a NaN level.
268+
"""
253269
null_mask = isna(level)
254270
if np.any(null_mask):
255271
code = np.where(null_mask[code], -1, code)
@@ -270,6 +286,11 @@ def _verify_integrity(self, codes=None, levels=None):
270286
ValueError
271287
If length of levels and codes don't match, if the codes for any
272288
level would exceed level bounds, or there are any duplicate levels.
289+
290+
Returns
291+
-------
292+
codes : new codes where code value = -1 if it corresponds to a
293+
NaN level.
273294
"""
274295
# NOTE: Currently does not check, among other things, that cached
275296
# nlevels matches nor that sortorder matches actually sortorder.
@@ -279,18 +300,18 @@ def _verify_integrity(self, codes=None, levels=None):
279300
if len(levels) != len(codes):
280301
raise ValueError("Length of levels and codes must match. NOTE:"
281302
" this index is in an inconsistent state.")
282-
codes_length = len(self.codes[0])
303+
codes_length = len(codes[0])
283304
for i, (level, level_codes) in enumerate(zip(levels, codes)):
284305
if len(level_codes) != codes_length:
285306
raise ValueError("Unequal code lengths: %s" %
286307
([len(code_) for code_ in codes]))
287308
if len(level_codes) and level_codes.max() >= len(level):
288-
raise ValueError("On level {level}, code max ({max_code})"
289-
" >= length of level ({level_len}). "
290-
"NOTE: this index is in an inconsistent"
291-
" state".format(
292-
level=i, max_code=level_codes.max(),
293-
level_len=len(level)))
309+
msg = ("On level {level}, code max ({max_code}) >= length of "
310+
"level ({level_len}). NOTE: this index is in an "
311+
"inconsistent state".format(
312+
level=i, max_code=level_codes.max(),
313+
level_len=len(level)))
314+
raise ValueError(msg)
294315
if len(level_codes) and level_codes.min() < -1:
295316
raise ValueError("On level {level}, code value ({code})"
296317
" < -1".format(
@@ -301,9 +322,9 @@ def _verify_integrity(self, codes=None, levels=None):
301322
values=[value for value in level],
302323
level=i))
303324

304-
codes = [self._validate_codes(level, code)
305-
for level, code in zip(levels, codes)]
306-
self._set_codes(codes, validate=False)
325+
codes = FrozenList([self._validate_codes(level, code)
326+
for level, code in zip(levels, codes)])
327+
return codes
307328

308329
@classmethod
309330
def from_arrays(cls, arrays, sortorder=None, names=None):
@@ -603,7 +624,8 @@ def _set_levels(self, levels, level=None, copy=False, validate=True,
603624
new_levels = FrozenList(new_levels)
604625

605626
if verify_integrity:
606-
self._verify_integrity(levels=new_levels)
627+
new_codes = self._verify_integrity(levels=new_levels)
628+
self._codes = new_codes
607629

608630
names = self.names
609631
self._levels = new_levels
@@ -711,10 +733,8 @@ def _set_codes(self, codes, level=None, copy=False, validate=True,
711733
level_codes, lev, copy=copy)._shallow_copy()
712734
new_codes = FrozenList(new_codes)
713735

714-
if verify_integrity:
715-
self._verify_integrity(codes=new_codes)
716-
else:
717-
self._codes = new_codes
736+
new_codes = self._verify_integrity(codes=new_codes)
737+
self._codes = new_codes
718738

719739
self._tuples = None
720740
self._reset_cache()
@@ -1778,9 +1798,10 @@ def __setstate__(self, state):
17781798

17791799
self._set_levels([Index(x) for x in levels], validate=False)
17801800
self._set_codes(codes)
1801+
new_codes = self._verify_integrity()
1802+
self._set_codes(new_codes)
17811803
self._set_names(names)
17821804
self.sortorder = sortorder
1783-
self._verify_integrity()
17841805
self._reset_identity()
17851806

17861807
def __getitem__(self, key):

pandas/tests/indexes/multi/test_constructor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,10 @@ def test_constructor_mismatched_codes_levels(idx):
6464
with pytest.raises(ValueError, match=msg):
6565
MultiIndex(levels=levels, codes=codes)
6666

67-
length_error = (r"On level 0, code max \(3\) >= length of level \(1\)\."
67+
length_error = (r"On level 0, code max \(3\) >= length of level \(1\)\."
6868
" NOTE: this index is in an inconsistent state")
6969
label_error = r"Unequal code lengths: \[4, 2\]"
70-
code_value_error = (r"On level 0, code value \(-2\) < -1")
70+
code_value_error = r"On level 0, code value \(-2\) < -1"
7171

7272
# important to check that it's looking at the right thing.
7373
with pytest.raises(ValueError, match=length_error):

0 commit comments

Comments
 (0)