Skip to content

Commit d0523e9

Browse files
committed
rename labels - codes in core/groupby/
1 parent 3992126 commit d0523e9

File tree

6 files changed

+61
-63
lines changed

6 files changed

+61
-63
lines changed

pandas/core/groupby/generic.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -654,16 +654,16 @@ def value_counts(
654654
rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx))
655655

656656
# multi-index components
657-
labels = list(map(rep, self.grouper.recons_labels)) + [llab(lab, inc)]
657+
codes = list(map(rep, self.grouper.recons_codes)) + [llab(lab, inc)]
658658
levels = [ping.group_index for ping in self.grouper.groupings] + [lev]
659659
names = self.grouper.names + [self._selection_name]
660660

661661
if dropna:
662-
mask = labels[-1] != -1
662+
mask = codes[-1] != -1
663663
if mask.all():
664664
dropna = False
665665
else:
666-
out, labels = out[mask], [label[mask] for label in labels]
666+
out, codes = out[mask], [level_codes[mask] for level_codes in codes]
667667

668668
if normalize:
669669
out = out.astype("float")
@@ -679,11 +679,11 @@ def value_counts(
679679
if sort and bins is None:
680680
cat = ids[inc][mask] if dropna else ids[inc]
681681
sorter = np.lexsort((out if ascending else -out, cat))
682-
out, labels[-1] = out[sorter], labels[-1][sorter]
682+
out, codes[-1] = out[sorter], codes[-1][sorter]
683683

684684
if bins is None:
685685
mi = MultiIndex(
686-
levels=levels, codes=labels, names=names, verify_integrity=False
686+
levels=levels, codes=codes, names=names, verify_integrity=False
687687
)
688688

689689
if is_integer_dtype(out):
@@ -693,14 +693,14 @@ def value_counts(
693693
# for compat. with libgroupby.value_counts need to ensure every
694694
# bin is present at every index level, null filled with zeros
695695
diff = np.zeros(len(out), dtype="bool")
696-
for lab in labels[:-1]:
697-
diff |= np.r_[True, lab[1:] != lab[:-1]]
696+
for codes_ in codes[:-1]:
697+
diff |= np.r_[True, codes_[1:] != codes_[:-1]]
698698

699699
ncat, nbin = diff.sum(), len(levels[-1])
700700

701701
left = [np.repeat(np.arange(ncat), nbin), np.tile(np.arange(nbin), ncat)]
702702

703-
right = [diff.cumsum() - 1, labels[-1]]
703+
right = [diff.cumsum() - 1, codes[-1]]
704704

705705
_, idx = _get_join_indexers(left, right, sort=False, how="left")
706706
out = np.where(idx != -1, out[idx], 0)
@@ -710,7 +710,7 @@ def value_counts(
710710
out, left[-1] = out[sorter], left[-1][sorter]
711711

712712
# build the multi-index w/ full levels
713-
codes = list(map(lambda lab: np.repeat(lab[diff], nbin), labels[:-1]))
713+
codes = list(map(lambda codes: np.repeat(codes[diff], nbin), codes[:-1]))
714714
codes.append(left[-1])
715715

716716
mi = MultiIndex(levels=levels, codes=codes, names=names, verify_integrity=False)
@@ -757,7 +757,7 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None):
757757
)
758758
)
759759
filled = getattr(self, fill_method)(limit=limit)
760-
fill_grp = filled.groupby(self.grouper.labels)
760+
fill_grp = filled.groupby(self.grouper.codes)
761761
shifted = fill_grp.shift(periods=periods, freq=freq)
762762

763763
return (filled / shifted) - 1

pandas/core/groupby/groupby.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2337,7 +2337,7 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None, axis=0
23372337
)
23382338
)
23392339
filled = getattr(self, fill_method)(limit=limit)
2340-
fill_grp = filled.groupby(self.grouper.labels)
2340+
fill_grp = filled.groupby(self.grouper.codes)
23412341
shifted = fill_grp.shift(periods=periods, freq=freq)
23422342
return (filled / shifted) - 1
23432343

pandas/core/groupby/grouper.py

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ class Grouper:
5959
<http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_.
6060
axis : number/name of the axis, defaults to 0
6161
sort : bool, default to False
62-
whether to sort the resulting labels
62+
whether to sort the resulting codes
6363
closed : {'left' or 'right'}
6464
Closed end of interval. Only when `freq` parameter is passed.
6565
label : {'left' or 'right'}
@@ -231,7 +231,7 @@ class Grouping:
231231
obj :
232232
name :
233233
level :
234-
observed : boolean, default False
234+
observed : bool, default False
235235
If we are a Categorical, use the observed values
236236
in_axis : if the Grouping is a column in self.obj and hence among
237237
Groupby.exclusions list
@@ -240,9 +240,7 @@ class Grouping:
240240
-------
241241
**Attributes**:
242242
* indices : dict of {group -> index_list}
243-
* labels : ndarray, group labels
244-
* ids : mapping of label -> group
245-
* counts : array of group counts
243+
* codes : ndarray, group codes
246244
* group_index : unique groups
247245
* groups : dict of {group -> label_list}
248246
"""
@@ -290,12 +288,12 @@ def __init__(
290288
if self.name is None:
291289
self.name = index.names[level]
292290

293-
self.grouper, self._labels, self._group_index = index._get_grouper_for_level( # noqa: E501
291+
self.grouper, self._codes, self._group_index = index._get_grouper_for_level( # noqa: E501
294292
self.grouper, level
295293
)
296294

297295
# a passed Grouper like, directly get the grouper in the same way
298-
# as single grouper groupby, use the group_info to get labels
296+
# as single grouper groupby, use the group_info to get codes
299297
elif isinstance(self.grouper, Grouper):
300298
# get the new grouper; we already have disambiguated
301299
# what key/level refer to exactly, don't need to
@@ -324,7 +322,7 @@ def __init__(
324322

325323
# we make a CategoricalIndex out of the cat grouper
326324
# preserving the categories / ordered attributes
327-
self._labels = self.grouper.codes
325+
self._codes = self.grouper.codes
328326
if observed:
329327
codes = algorithms.unique1d(self.grouper.codes)
330328
codes = codes[codes != -1]
@@ -380,7 +378,7 @@ def __repr__(self):
380378
def __iter__(self):
381379
return iter(self.indices)
382380

383-
_labels = None
381+
_codes = None
384382
_group_index = None
385383

386384
@property
@@ -397,10 +395,10 @@ def indices(self):
397395
return values._reverse_indexer()
398396

399397
@property
400-
def labels(self):
401-
if self._labels is None:
402-
self._make_labels()
403-
return self._labels
398+
def codes(self):
399+
if self._codes is None:
400+
self._make_codes()
401+
return self._codes
404402

405403
@cache_readonly
406404
def result_index(self):
@@ -411,24 +409,24 @@ def result_index(self):
411409
@property
412410
def group_index(self):
413411
if self._group_index is None:
414-
self._make_labels()
412+
self._make_codes()
415413
return self._group_index
416414

417-
def _make_labels(self):
418-
if self._labels is None or self._group_index is None:
415+
def _make_codes(self):
416+
if self._codes is None or self._group_index is None:
419417
# we have a list of groupers
420418
if isinstance(self.grouper, BaseGrouper):
421-
labels = self.grouper.label_info
419+
codes = self.grouper.codes_info
422420
uniques = self.grouper.result_index
423421
else:
424-
labels, uniques = algorithms.factorize(self.grouper, sort=self.sort)
422+
codes, uniques = algorithms.factorize(self.grouper, sort=self.sort)
425423
uniques = Index(uniques, name=self.name)
426-
self._labels = labels
424+
self._codes = codes
427425
self._group_index = uniques
428426

429427
@cache_readonly
430428
def groups(self):
431-
return self.index.groupby(Categorical.from_codes(self.labels, self.group_index))
429+
return self.index.groupby(Categorical.from_codes(self.codes, self.group_index))
432430

433431

434432
def _get_grouper(

pandas/core/groupby/ops.py

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ def _get_group_keys(self):
133133
comp_ids, _, ngroups = self.group_info
134134

135135
# provide "flattened" iterator for multi-group setting
136-
return get_flattened_iterator(comp_ids, ngroups, self.levels, self.labels)
136+
return get_flattened_iterator(comp_ids, ngroups, self.levels, self.codes)
137137

138138
def apply(self, f, data, axis=0):
139139
mutated = self.mutated
@@ -204,13 +204,13 @@ def indices(self):
204204
if len(self.groupings) == 1:
205205
return self.groupings[0].indices
206206
else:
207-
label_list = [ping.labels for ping in self.groupings]
207+
codes_list = [ping.codes for ping in self.groupings]
208208
keys = [com.values_from_object(ping.group_index) for ping in self.groupings]
209-
return get_indexer_dict(label_list, keys)
209+
return get_indexer_dict(codes_list, keys)
210210

211211
@property
212-
def labels(self):
213-
return [ping.labels for ping in self.groupings]
212+
def codes(self):
213+
return [ping.codes for ping in self.groupings]
214214

215215
@property
216216
def levels(self):
@@ -250,46 +250,46 @@ def is_monotonic(self):
250250

251251
@cache_readonly
252252
def group_info(self):
253-
comp_ids, obs_group_ids = self._get_compressed_labels()
253+
comp_ids, obs_group_ids = self._get_compressed_codes()
254254

255255
ngroups = len(obs_group_ids)
256256
comp_ids = ensure_int64(comp_ids)
257257
return comp_ids, obs_group_ids, ngroups
258258

259259
@cache_readonly
260-
def label_info(self):
261-
# return the labels of items in original grouped axis
262-
labels, _, _ = self.group_info
260+
def codes_info(self):
261+
# return the codes of items in original grouped axis
262+
codes, _, _ = self.group_info
263263
if self.indexer is not None:
264-
sorter = np.lexsort((labels, self.indexer))
265-
labels = labels[sorter]
266-
return labels
267-
268-
def _get_compressed_labels(self):
269-
all_labels = [ping.labels for ping in self.groupings]
270-
if len(all_labels) > 1:
271-
group_index = get_group_index(all_labels, self.shape, sort=True, xnull=True)
264+
sorter = np.lexsort((codes, self.indexer))
265+
codes = codes[sorter]
266+
return codes
267+
268+
def _get_compressed_codes(self):
269+
all_codes = [ping.codes for ping in self.groupings]
270+
if len(all_codes) > 1:
271+
group_index = get_group_index(all_codes, self.shape, sort=True, xnull=True)
272272
return compress_group_index(group_index, sort=self.sort)
273273

274274
ping = self.groupings[0]
275-
return ping.labels, np.arange(len(ping.group_index))
275+
return ping.codes, np.arange(len(ping.group_index))
276276

277277
@cache_readonly
278278
def ngroups(self):
279279
return len(self.result_index)
280280

281281
@property
282-
def recons_labels(self):
282+
def recons_codes(self):
283283
comp_ids, obs_ids, _ = self.group_info
284-
labels = (ping.labels for ping in self.groupings)
285-
return decons_obs_group_ids(comp_ids, obs_ids, self.shape, labels, xnull=True)
284+
codes = (ping.codes for ping in self.groupings)
285+
return decons_obs_group_ids(comp_ids, obs_ids, self.shape, codes, xnull=True)
286286

287287
@cache_readonly
288288
def result_index(self):
289289
if not self.compressed and len(self.groupings) == 1:
290290
return self.groupings[0].result_index.rename(self.names[0])
291291

292-
codes = self.recons_labels
292+
codes = self.recons_codes
293293
levels = [ping.result_index for ping in self.groupings]
294294
result = MultiIndex(
295295
levels=levels, codes=codes, verify_integrity=False, names=self.names
@@ -301,9 +301,9 @@ def get_group_levels(self):
301301
return [self.groupings[0].result_index]
302302

303303
name_list = []
304-
for ping, labels in zip(self.groupings, self.recons_labels):
305-
labels = ensure_platform_int(labels)
306-
levels = ping.result_index.take(labels)
304+
for ping, codes in zip(self.groupings, self.recons_codes):
305+
codes = ensure_platform_int(codes)
306+
levels = ping.result_index.take(codes)
307307

308308
name_list.append(levels)
309309

@@ -482,15 +482,15 @@ def _cython_operation(self, kind: str, values, how, axis, min_count=-1, **kwargs
482482
else:
483483
out_dtype = "object"
484484

485-
labels, _, _ = self.group_info
485+
codes, _, _ = self.group_info
486486

487487
if kind == "aggregate":
488488
result = _maybe_fill(
489489
np.empty(out_shape, dtype=out_dtype), fill_value=np.nan
490490
)
491491
counts = np.zeros(self.ngroups, dtype=np.int64)
492492
result = self._aggregate(
493-
result, counts, values, labels, func, is_datetimelike, min_count
493+
result, counts, values, codes, func, is_datetimelike, min_count
494494
)
495495
elif kind == "transform":
496496
result = _maybe_fill(
@@ -499,7 +499,7 @@ def _cython_operation(self, kind: str, values, how, axis, min_count=-1, **kwargs
499499

500500
# TODO: min_count
501501
result = self._transform(
502-
result, values, labels, func, is_datetimelike, **kwargs
502+
result, values, codes, func, is_datetimelike, **kwargs
503503
)
504504

505505
if is_integer_dtype(result) and not is_datetimelike:

pandas/tests/groupby/test_grouping.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -559,12 +559,12 @@ def test_level_preserve_order(self, sort, labels, mframe):
559559
# GH 17537
560560
grouped = mframe.groupby(level=0, sort=sort)
561561
exp_labels = np.array(labels, np.intp)
562-
tm.assert_almost_equal(grouped.grouper.labels[0], exp_labels)
562+
tm.assert_almost_equal(grouped.grouper.codes[0], exp_labels)
563563

564564
def test_grouping_labels(self, mframe):
565565
grouped = mframe.groupby(mframe.index.get_level_values(0))
566566
exp_labels = np.array([2, 2, 2, 0, 0, 1, 1, 3, 3, 3], dtype=np.intp)
567-
tm.assert_almost_equal(grouped.grouper.labels[0], exp_labels)
567+
tm.assert_almost_equal(grouped.grouper.codes[0], exp_labels)
568568

569569
def test_list_grouper_with_nat(self):
570570
# GH 14715

pandas/util/testing.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -621,8 +621,8 @@ def _check_types(l, r, obj="Index"):
621621
def _get_ilevel_values(index, level):
622622
# accept level number only
623623
unique = index.levels[level]
624-
labels = index.codes[level]
625-
filled = take_1d(unique.values, labels, fill_value=unique._na_value)
624+
level_codes = index.codes[level]
625+
filled = take_1d(unique.values, level_codes, fill_value=unique._na_value)
626626
values = unique._shallow_copy(filled, name=index.names[level])
627627
return values
628628

0 commit comments

Comments
 (0)