Closed
Description
When a categorical grouper is used on a DataFrame with a range index, the sort order of the values depends only on whether the categorical is ordered. When the grouper is the index, the sort order of the values depends only on the sort
argument to groupby.
Code
categories = np.arange(4, -1, -1)
for range_index in [True, False]:
df_ordered = DataFrame(
{
"a": Categorical([2, 1, 2, 3], categories=categories, ordered=True),
"b": range(4)
}
)
df_unordered = DataFrame(
{
"a": Categorical([2, 1, 2, 3], categories=categories, ordered=False),
"b": range(4)
}
)
if not range_index:
df_ordered = df_ordered.set_index('a')
df_unordered = df_unordered.set_index('a')
gb_ordered_sort = df_ordered.groupby("a", sort=True, observed=True)
gb_ordered_nosort = df_ordered.groupby("a", sort=False, observed=True)
gb_unordered_sort = df_unordered.groupby("a", sort=True, observed=True)
gb_unordered_nosort = df_unordered.groupby("a", sort=False, observed=True)
print('Range Index:', range_index)
print(' ordered=True, sort=True:', gb_ordered_sort.sum()["b"].tolist())
print(' ordered=True, sort=False:', gb_ordered_nosort.sum()["b"].tolist())
print(' ordered=False, sort=True:', gb_unordered_sort.sum()["b"].tolist())
print(' ordered=False, sort=False:', gb_unordered_nosort.sum()["b"].tolist())
print('---')
Range Index: True
ordered=True, sort=True: [3, 2, 1]
ordered=True, sort=False: [3, 2, 1]
ordered=False, sort=True: [2, 1, 3]
ordered=False, sort=False: [2, 1, 3]
---
Range Index: False
ordered=True, sort=True: [3, 2, 1]
ordered=True, sort=False: [2, 1, 3]
ordered=False, sort=True: [3, 2, 1]
ordered=False, sort=False: [2, 1, 3]
---