Skip to content

BUG: Putting a Categorical series in a DataFrame with a different index raises IndexError #8076

Closed
@shoyer

Description

@shoyer
>>> import pandas as pd
>>> pd.DataFrame({'x': pd.Series(['a', 'b', 'c'])}, index=pd.date_range('20000101', periods=3))
              x
2000-01-01  NaN
2000-01-02  NaN
2000-01-03  NaN
>>> df = pd.DataFrame({'x': pd.Series(pd.Categorical(['a', 'b', 'c']))}, index=pd.date_range('20000101', periods=3))
>>> df
<repr(<pandas.core.frame.DataFrame at 0x107f09f50>) failed: IndexError: Out of bounds on buffer access (axis 0)>
>>> df.values
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-13-e8bb9a30bd4f> in <module>()
----> 1 df.values

/Users/shoyer/dev/pandas/pandas/core/generic.py in values(self)
   2071         int32.
   2072         """
-> 2073         return self.as_matrix()
   2074 
   2075     @property

/Users/shoyer/dev/pandas/pandas/core/generic.py in as_matrix(self, columns)
   2053         self._consolidate_inplace()
   2054         if self._AXIS_REVERSED:
-> 2055             return self._data.as_matrix(columns).T
   2056         return self._data.as_matrix(columns)
   2057 

/Users/shoyer/dev/pandas/pandas/core/internals.py in as_matrix(self, items)
   2676 
   2677         if self._is_single_block or not self.is_mixed_type:
-> 2678             return mgr.blocks[0].get_values()
   2679         else:
   2680             return mgr._interleave()

/Users/shoyer/dev/pandas/pandas/core/internals.py in get_values(self, dtype)
   1079     def get_values(self, dtype=None):
   1080         """ need to to_dense myself (and always return a ndim sized object) """
-> 1081         values = self.values.to_dense()
   1082         if values.ndim == self.ndim - 1:
   1083             values = values.reshape((1,) + values.shape)

/Users/shoyer/dev/pandas/pandas/core/categorical.py in to_dense(self)
    683     def to_dense(self):
    684         """ Return my 'dense' repr """
--> 685         return np.asarray(self)
    686 
    687     def fillna(self, fill_value=None, method=None, limit=None, **kwargs):

/Users/shoyer/miniconda/envs/pandas-dev/lib/python2.7/site-packages/numpy/core/numeric.pyc in asarray(a, dtype, order)
    458 
    459     """
--> 460     return array(a, dtype, copy=False, order=order)
    461 
    462 def asanyarray(a, dtype=None, order=None):

/Users/shoyer/dev/pandas/pandas/core/categorical.py in __array__(self, dtype)
    484             dtype as categorical.levels.dtype
    485         """
--> 486         ret = com.take_1d(self.levels.values, self._codes)
    487         if dtype and dtype != self.levels.dtype:
    488             return np.asarray(ret, dtype)

/Users/shoyer/dev/pandas/pandas/core/common.py in take_nd(arr, indexer, axis, out, fill_value, mask_info, allow_fill)
    805                                  axis=axis, mask_info=mask_info)
    806 
--> 807     func(arr, indexer, out, fill_value)
    808 
    809     if flip_order:

/Users/shoyer/dev/pandas/pandas/algos.so in pandas.algos.take_1d_object_object (pandas/algos.c:78943)()

IndexError: Out of bounds on buffer access (axis 0)

This is on master.

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions