Skip to content

Pure python multi-key groupby can't handle non-numeric results #612

Closed
@wesm

Description

@wesm

reported by @yarikoptic, could occur fairly easily in 0.6.1, now very pathological to reproduce

data = DataFrame({'A' : ['foo', 'foo', 'foo', 'foo',
                                      'bar', 'bar', 'bar', 'bar',
                                      'foo', 'foo', 'foo'],
                               'B' : ['one', 'one', 'one', 'two',
                                      'one', 'one', 'one', 'two',
                                      'two', 'two', 'one'],
                               'C' : ['dull', 'dull', 'shiny', 'dull',
                                      'dull', 'shiny', 'shiny', 'dull',
                                      'shiny', 'shiny', 'shiny'],
                               'D' : np.random.randn(11),
                               'E' : np.random.randn(11),
                               'F' : np.random.randn(11)})

def bad(x):
    assert(len(x.base) == len(x))
    return 'foo'

data.groupby(['A', 'B']).agg(bad)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/home/wesm/code/pandas/vb_suite/<ipython-input-23-745606b0da15> in <module>()
----> 1 data.groupby(['A', 'B']).agg(bad)

/home/wesm/code/pandas/pandas/core/groupby.pyc in agg(self, func, *args, **kwargs)
    283         See docstring for aggregate
    284         """
--> 285         return self.aggregate(func, *args, **kwargs)
    286 
    287     def _iterate_slices(self):

/home/wesm/code/pandas/pandas/core/groupby.pyc in aggregate(self, arg, *args, **kwargs)
    963         else:
    964             if len(self.groupings) > 1:
--> 965                 return self._python_agg_general(arg, *args, **kwargs)
    966             else:
    967                 result = self._aggregate_generic(arg, *args, **kwargs)

/home/wesm/code/pandas/pandas/core/groupby.pyc in _python_agg_general(self, func, *args, **kwargs)
    393             try:
    394                 result, counts = self._aggregate_series(obj, agg_func,
--> 395                                                         comp_ids, max_group)
    396                 output[name] = result                                                                                                 
    397             except TypeError:

/home/wesm/code/pandas/pandas/core/groupby.pyc in _aggregate_series(self, obj, func, group_index, ngroups)
    411             return _aggregate_series_fast(obj, func, group_index, ngroups)
    412         except Exception:
--> 413             return self._aggregate_series_pure_python(obj, func, ngroups)
    414 
    415     def _aggregate_series_pure_python(self, obj, func, ngroups):

/home/wesm/code/pandas/pandas/core/groupby.pyc in _aggregate_series_pure_python(self, obj, func, ngroups)
    422                 continue
    423             counts[label] = group.shape[0]
--> 424             result[label] = func(group)
    425 
    426         return result, counts

ValueError: could not convert string to float: foo

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions