Closed
Description
Why doesn't the pandas.Series.count() method work as a valid aggregation with groupby when as_index=False?
df=pd.DataFrame([['foo','foo','bar','bar','bar','oats'],[1.0,2.0,3.0,4.0,4.0,5.0],[2.0,3.0,4.0,5.0,1.0,5.0]]).T
df.columns=['mycat','var1','var2']
df.var1=df.var1.astype('int64')
df.var2=df.var2.astype('int64')
df
Now, if I try to do a group by
df.groupby('mycat', as_index=False).var1.count()
Here is the error I get:
ValueError Traceback (most recent call last)
<ipython-input-383-27b244bccb8b> in <module>()
----> 1 df.groupby('mycat', as_index=False).var1.count()
/usr/local/lib/python2.7/dist-packages/pandas/core/groupby.pyc in count(self, axis)
740
741 def count(self, axis=0):
--> 742 return self._count().astype('int64')
743
744 def ohlc(self):
/usr/local/lib/python2.7/dist-packages/pandas/core/generic.pyc in astype(self, dtype, copy, raise_on_error)
2096
2097 mgr = self._data.astype(
-> 2098 dtype=dtype, copy=copy, raise_on_error=raise_on_error)
2099 return self._constructor(mgr).__finalize__(self)
2100
/usr/local/lib/python2.7/dist-packages/pandas/core/internals.pyc in astype(self, dtype, **kwargs)
2235
2236 def astype(self, dtype, **kwargs):
-> 2237 return self.apply('astype', dtype=dtype, **kwargs)
2238
2239 def convert(self, **kwargs):
/usr/local/lib/python2.7/dist-packages/pandas/core/internals.pyc in apply(self, f, axes, filter, do_integrity_check, **kwargs)
2190 copy=align_copy)
2191
-> 2192 applied = getattr(b, f)(**kwargs)
2193
2194 if isinstance(applied, list):
/usr/local/lib/python2.7/dist-packages/pandas/core/internals.pyc in astype(self, dtype, copy, raise_on_error, values)
319 def astype(self, dtype, copy=False, raise_on_error=True, values=None):
320 return self._astype(dtype, copy=copy, raise_on_error=raise_on_error,
--> 321 values=values)
322
323 def _astype(self, dtype, copy=False, raise_on_error=True, values=None,
/usr/local/lib/python2.7/dist-packages/pandas/core/internals.pyc in _astype(self, dtype, copy, raise_on_error, values, klass)
337 if values is None:
338 # _astype_nansafe works fine with 1-d only
--> 339 values = com._astype_nansafe(self.values.ravel(), dtype, copy=True)
340 values = values.reshape(self.values.shape)
341 newb = make_block(values,
/usr/local/lib/python2.7/dist-packages/pandas/core/common.pyc in _astype_nansafe(arr, dtype, copy)
2410 elif arr.dtype == np.object_ and np.issubdtype(dtype.type, np.integer):
2411 # work around NumPy brokenness, #1987
-> 2412 return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape)
2413 elif issubclass(dtype.type, compat.string_types):
2414 return lib.astype_str(arr.ravel()).reshape(arr.shape)
/usr/local/lib/python2.7/dist-packages/pandas/lib.so in pandas.lib.astype_intsafe (pandas/lib.c:13456)()
/usr/local/lib/python2.7/dist-packages/pandas/lib.so in util.set_value_at (pandas/lib.c:55994)()
ValueError: invalid literal for long() with base 10: 'bar'
When i set as_index=True, I get
df.groupby('mycat', as_index=True).var1.count()
When I change the agg function and set_index=False, I get a weird result tooL
df.groupby('mycat', as_index=False).var1.agg(np.count_nonzero)
UPDATE: Realized my last result was not counting correctly and am now thoroughly confused.