Closed
Description
The internal impl of Categorical.value_counts
should just do this. I think it is factorizing multiple times when it is not necessary.
In [32]: np.random.seed(1234)
In [33]: n = 500000
In [34]: u = int(0.1*n)
In [35]: arr = [ "s%04d" % i for i in np.random.randint(0,u,size=n) ]
In [36]: c = pd.Series(arr).astype('category')
In [37]: result1 = Series(np.arange(len(c.cat.categories)),c.cat.categories).map(c.cat.codes.value_counts()).order(ascending=False)
In [38]: result2 = c.value_counts()
In [39]: %timeit Series(np.arange(len(c.cat.categories)),c.cat.categories).map(c.cat.codes.value_counts()).order(ascending=False)
100 loops, best of 3: 17.2 ms per loop
In [40]: %timeit c.value_counts()
10 loops, best of 3: 62.3 ms per loop
In [41]: result1.equals(result2)
Out[41]: True