Skip to content

Commit c6dac01

Browse files
committed
rename labels - codes in core/groupby/
1 parent cacc9e2 commit c6dac01

File tree

6 files changed

+61
-63
lines changed

6 files changed

+61
-63
lines changed

pandas/core/groupby/generic.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -655,16 +655,16 @@ def value_counts(
655655
rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx))
656656

657657
# multi-index components
658-
labels = list(map(rep, self.grouper.recons_labels)) + [llab(lab, inc)]
658+
codes = list(map(rep, self.grouper.recons_codes)) + [llab(lab, inc)]
659659
levels = [ping.group_index for ping in self.grouper.groupings] + [lev]
660660
names = self.grouper.names + [self._selection_name]
661661

662662
if dropna:
663-
mask = labels[-1] != -1
663+
mask = codes[-1] != -1
664664
if mask.all():
665665
dropna = False
666666
else:
667-
out, labels = out[mask], [label[mask] for label in labels]
667+
out, codes = out[mask], [level_codes[mask] for level_codes in codes]
668668

669669
if normalize:
670670
out = out.astype("float")
@@ -680,11 +680,11 @@ def value_counts(
680680
if sort and bins is None:
681681
cat = ids[inc][mask] if dropna else ids[inc]
682682
sorter = np.lexsort((out if ascending else -out, cat))
683-
out, labels[-1] = out[sorter], labels[-1][sorter]
683+
out, codes[-1] = out[sorter], codes[-1][sorter]
684684

685685
if bins is None:
686686
mi = MultiIndex(
687-
levels=levels, codes=labels, names=names, verify_integrity=False
687+
levels=levels, codes=codes, names=names, verify_integrity=False
688688
)
689689

690690
if is_integer_dtype(out):
@@ -694,14 +694,14 @@ def value_counts(
694694
# for compat. with libgroupby.value_counts need to ensure every
695695
# bin is present at every index level, null filled with zeros
696696
diff = np.zeros(len(out), dtype="bool")
697-
for lab in labels[:-1]:
698-
diff |= np.r_[True, lab[1:] != lab[:-1]]
697+
for codes_ in codes[:-1]:
698+
diff |= np.r_[True, codes_[1:] != codes_[:-1]]
699699

700700
ncat, nbin = diff.sum(), len(levels[-1])
701701

702702
left = [np.repeat(np.arange(ncat), nbin), np.tile(np.arange(nbin), ncat)]
703703

704-
right = [diff.cumsum() - 1, labels[-1]]
704+
right = [diff.cumsum() - 1, codes[-1]]
705705

706706
_, idx = _get_join_indexers(left, right, sort=False, how="left")
707707
out = np.where(idx != -1, out[idx], 0)
@@ -711,7 +711,7 @@ def value_counts(
711711
out, left[-1] = out[sorter], left[-1][sorter]
712712

713713
# build the multi-index w/ full levels
714-
codes = list(map(lambda lab: np.repeat(lab[diff], nbin), labels[:-1]))
714+
codes = list(map(lambda codes: np.repeat(codes[diff], nbin), codes[:-1]))
715715
codes.append(left[-1])
716716

717717
mi = MultiIndex(levels=levels, codes=codes, names=names, verify_integrity=False)
@@ -758,7 +758,7 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None):
758758
)
759759
)
760760
filled = getattr(self, fill_method)(limit=limit)
761-
fill_grp = filled.groupby(self.grouper.labels)
761+
fill_grp = filled.groupby(self.grouper.codes)
762762
shifted = fill_grp.shift(periods=periods, freq=freq)
763763

764764
return (filled / shifted) - 1

pandas/core/groupby/groupby.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2338,7 +2338,7 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None, axis=0
23382338
)
23392339
)
23402340
filled = getattr(self, fill_method)(limit=limit)
2341-
fill_grp = filled.groupby(self.grouper.labels)
2341+
fill_grp = filled.groupby(self.grouper.codes)
23422342
shifted = fill_grp.shift(periods=periods, freq=freq)
23432343
return (filled / shifted) - 1
23442344

pandas/core/groupby/grouper.py

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ class Grouper:
5959
<http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_.
6060
axis : number/name of the axis, defaults to 0
6161
sort : bool, default to False
62-
whether to sort the resulting labels
62+
whether to sort the resulting codes
6363
closed : {'left' or 'right'}
6464
Closed end of interval. Only when `freq` parameter is passed.
6565
label : {'left' or 'right'}
@@ -231,7 +231,7 @@ class Grouping:
231231
obj :
232232
name :
233233
level :
234-
observed : boolean, default False
234+
observed : bool, default False
235235
If we are a Categorical, use the observed values
236236
in_axis : if the Grouping is a column in self.obj and hence among
237237
Groupby.exclusions list
@@ -240,9 +240,7 @@ class Grouping:
240240
-------
241241
**Attributes**:
242242
* indices : dict of {group -> index_list}
243-
* labels : ndarray, group labels
244-
* ids : mapping of label -> group
245-
* counts : array of group counts
243+
* codes : ndarray, group codes
246244
* group_index : unique groups
247245
* groups : dict of {group -> label_list}
248246
"""
@@ -290,12 +288,12 @@ def __init__(
290288
if self.name is None:
291289
self.name = index.names[level]
292290

293-
self.grouper, self._labels, self._group_index = index._get_grouper_for_level( # noqa: E501
291+
self.grouper, self._codes, self._group_index = index._get_grouper_for_level( # noqa: E501
294292
self.grouper, level
295293
)
296294

297295
# a passed Grouper like, directly get the grouper in the same way
298-
# as single grouper groupby, use the group_info to get labels
296+
# as single grouper groupby, use the group_info to get codes
299297
elif isinstance(self.grouper, Grouper):
300298
# get the new grouper; we already have disambiguated
301299
# what key/level refer to exactly, don't need to
@@ -324,7 +322,7 @@ def __init__(
324322

325323
# we make a CategoricalIndex out of the cat grouper
326324
# preserving the categories / ordered attributes
327-
self._labels = self.grouper.codes
325+
self._codes = self.grouper.codes
328326
if observed:
329327
codes = algorithms.unique1d(self.grouper.codes)
330328
codes = codes[codes != -1]
@@ -380,7 +378,7 @@ def __repr__(self):
380378
def __iter__(self):
381379
return iter(self.indices)
382380

383-
_labels = None
381+
_codes = None
384382
_group_index = None
385383

386384
@property
@@ -397,10 +395,10 @@ def indices(self):
397395
return values._reverse_indexer()
398396

399397
@property
400-
def labels(self):
401-
if self._labels is None:
402-
self._make_labels()
403-
return self._labels
398+
def codes(self):
399+
if self._codes is None:
400+
self._make_codes()
401+
return self._codes
404402

405403
@cache_readonly
406404
def result_index(self):
@@ -411,24 +409,24 @@ def result_index(self):
411409
@property
412410
def group_index(self):
413411
if self._group_index is None:
414-
self._make_labels()
412+
self._make_codes()
415413
return self._group_index
416414

417-
def _make_labels(self):
418-
if self._labels is None or self._group_index is None:
415+
def _make_codes(self):
416+
if self._codes is None or self._group_index is None:
419417
# we have a list of groupers
420418
if isinstance(self.grouper, BaseGrouper):
421-
labels = self.grouper.label_info
419+
codes = self.grouper.codes_info
422420
uniques = self.grouper.result_index
423421
else:
424-
labels, uniques = algorithms.factorize(self.grouper, sort=self.sort)
422+
codes, uniques = algorithms.factorize(self.grouper, sort=self.sort)
425423
uniques = Index(uniques, name=self.name)
426-
self._labels = labels
424+
self._codes = codes
427425
self._group_index = uniques
428426

429427
@cache_readonly
430428
def groups(self):
431-
return self.index.groupby(Categorical.from_codes(self.labels, self.group_index))
429+
return self.index.groupby(Categorical.from_codes(self.codes, self.group_index))
432430

433431

434432
def _get_grouper(

pandas/core/groupby/ops.py

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ def _get_group_keys(self):
133133
comp_ids, _, ngroups = self.group_info
134134

135135
# provide "flattened" iterator for multi-group setting
136-
return get_flattened_iterator(comp_ids, ngroups, self.levels, self.labels)
136+
return get_flattened_iterator(comp_ids, ngroups, self.levels, self.codes)
137137

138138
def apply(self, f, data, axis: int = 0):
139139
mutated = self.mutated
@@ -204,13 +204,13 @@ def indices(self):
204204
if len(self.groupings) == 1:
205205
return self.groupings[0].indices
206206
else:
207-
label_list = [ping.labels for ping in self.groupings]
207+
codes_list = [ping.codes for ping in self.groupings]
208208
keys = [com.values_from_object(ping.group_index) for ping in self.groupings]
209-
return get_indexer_dict(label_list, keys)
209+
return get_indexer_dict(codes_list, keys)
210210

211211
@property
212-
def labels(self):
213-
return [ping.labels for ping in self.groupings]
212+
def codes(self):
213+
return [ping.codes for ping in self.groupings]
214214

215215
@property
216216
def levels(self):
@@ -250,46 +250,46 @@ def is_monotonic(self) -> bool:
250250

251251
@cache_readonly
252252
def group_info(self):
253-
comp_ids, obs_group_ids = self._get_compressed_labels()
253+
comp_ids, obs_group_ids = self._get_compressed_codes()
254254

255255
ngroups = len(obs_group_ids)
256256
comp_ids = ensure_int64(comp_ids)
257257
return comp_ids, obs_group_ids, ngroups
258258

259259
@cache_readonly
260-
def label_info(self):
261-
# return the labels of items in original grouped axis
262-
labels, _, _ = self.group_info
260+
def codes_info(self):
261+
# return the codes of items in original grouped axis
262+
codes, _, _ = self.group_info
263263
if self.indexer is not None:
264-
sorter = np.lexsort((labels, self.indexer))
265-
labels = labels[sorter]
266-
return labels
267-
268-
def _get_compressed_labels(self):
269-
all_labels = [ping.labels for ping in self.groupings]
270-
if len(all_labels) > 1:
271-
group_index = get_group_index(all_labels, self.shape, sort=True, xnull=True)
264+
sorter = np.lexsort((codes, self.indexer))
265+
codes = codes[sorter]
266+
return codes
267+
268+
def _get_compressed_codes(self):
269+
all_codes = [ping.codes for ping in self.groupings]
270+
if len(all_codes) > 1:
271+
group_index = get_group_index(all_codes, self.shape, sort=True, xnull=True)
272272
return compress_group_index(group_index, sort=self.sort)
273273

274274
ping = self.groupings[0]
275-
return ping.labels, np.arange(len(ping.group_index))
275+
return ping.codes, np.arange(len(ping.group_index))
276276

277277
@cache_readonly
278278
def ngroups(self) -> int:
279279
return len(self.result_index)
280280

281281
@property
282-
def recons_labels(self):
282+
def recons_codes(self):
283283
comp_ids, obs_ids, _ = self.group_info
284-
labels = (ping.labels for ping in self.groupings)
285-
return decons_obs_group_ids(comp_ids, obs_ids, self.shape, labels, xnull=True)
284+
codes = (ping.codes for ping in self.groupings)
285+
return decons_obs_group_ids(comp_ids, obs_ids, self.shape, codes, xnull=True)
286286

287287
@cache_readonly
288288
def result_index(self):
289289
if not self.compressed and len(self.groupings) == 1:
290290
return self.groupings[0].result_index.rename(self.names[0])
291291

292-
codes = self.recons_labels
292+
codes = self.recons_codes
293293
levels = [ping.result_index for ping in self.groupings]
294294
result = MultiIndex(
295295
levels=levels, codes=codes, verify_integrity=False, names=self.names
@@ -301,9 +301,9 @@ def get_group_levels(self):
301301
return [self.groupings[0].result_index]
302302

303303
name_list = []
304-
for ping, labels in zip(self.groupings, self.recons_labels):
305-
labels = ensure_platform_int(labels)
306-
levels = ping.result_index.take(labels)
304+
for ping, codes in zip(self.groupings, self.recons_codes):
305+
codes = ensure_platform_int(codes)
306+
levels = ping.result_index.take(codes)
307307

308308
name_list.append(levels)
309309

@@ -484,15 +484,15 @@ def _cython_operation(
484484
else:
485485
out_dtype = "object"
486486

487-
labels, _, _ = self.group_info
487+
codes, _, _ = self.group_info
488488

489489
if kind == "aggregate":
490490
result = _maybe_fill(
491491
np.empty(out_shape, dtype=out_dtype), fill_value=np.nan
492492
)
493493
counts = np.zeros(self.ngroups, dtype=np.int64)
494494
result = self._aggregate(
495-
result, counts, values, labels, func, is_datetimelike, min_count
495+
result, counts, values, codes, func, is_datetimelike, min_count
496496
)
497497
elif kind == "transform":
498498
result = _maybe_fill(
@@ -501,7 +501,7 @@ def _cython_operation(
501501

502502
# TODO: min_count
503503
result = self._transform(
504-
result, values, labels, func, is_datetimelike, **kwargs
504+
result, values, codes, func, is_datetimelike, **kwargs
505505
)
506506

507507
if is_integer_dtype(result) and not is_datetimelike:

pandas/tests/groupby/test_grouping.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -559,12 +559,12 @@ def test_level_preserve_order(self, sort, labels, mframe):
559559
# GH 17537
560560
grouped = mframe.groupby(level=0, sort=sort)
561561
exp_labels = np.array(labels, np.intp)
562-
tm.assert_almost_equal(grouped.grouper.labels[0], exp_labels)
562+
tm.assert_almost_equal(grouped.grouper.codes[0], exp_labels)
563563

564564
def test_grouping_labels(self, mframe):
565565
grouped = mframe.groupby(mframe.index.get_level_values(0))
566566
exp_labels = np.array([2, 2, 2, 0, 0, 1, 1, 3, 3, 3], dtype=np.intp)
567-
tm.assert_almost_equal(grouped.grouper.labels[0], exp_labels)
567+
tm.assert_almost_equal(grouped.grouper.codes[0], exp_labels)
568568

569569
def test_list_grouper_with_nat(self):
570570
# GH 14715

pandas/util/testing.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -621,8 +621,8 @@ def _check_types(l, r, obj="Index"):
621621
def _get_ilevel_values(index, level):
622622
# accept level number only
623623
unique = index.levels[level]
624-
labels = index.codes[level]
625-
filled = take_1d(unique.values, labels, fill_value=unique._na_value)
624+
level_codes = index.codes[level]
625+
filled = take_1d(unique.values, level_codes, fill_value=unique._na_value)
626626
values = unique._shallow_copy(filled, name=index.names[level])
627627
return values
628628

0 commit comments

Comments
 (0)