Skip to content

read_msgpack can't parse categorical data #12570

Closed
@pwaller

Description

@pwaller

A copy-pastable code sample

import pandas
c = pandas.Categorical(["A", "B", "C", "D"], categories=["A", "B", "C", "D"])
df = pandas.DataFrame({"x": c})
bytes = df.to_msgpack()
pandas.read_msgpack(bytes)

What happens

<ipython-input-1-ec1dc8306f4c> in <module>()
      1 import pandas
      2 df = pandas.DataFrame({"x": pandas.Categorical(["A", "B", "C", "D"], categories=["A", "B", "C", "D"])})
----> 3 pandas.read_msgpack(df.to_msgpack())

/home/pwaller/.local/lib/python3.4/site-packages/pandas/io/packers.py in read_msgpack(path_or_buf, encoding, iterator, **kwargs)
    153         try:
    154             fh = compat.BytesIO(path_or_buf)
--> 155             return read(fh)
    156         finally:
    157             if fh is not None:

/home/pwaller/.local/lib/python3.4/site-packages/pandas/io/packers.py in read(fh)
    131 
    132     def read(fh):
--> 133         l = list(unpack(fh, encoding=encoding, **kwargs))
    134         if len(l) == 1:
    135             return l[0]

pandas/msgpack/_unpacker.pyx in pandas.msgpack._unpacker.Unpacker.__next__ (pandas/msgpack/_unpacker.cpp:5277)()

pandas/msgpack/_unpacker.pyx in pandas.msgpack._unpacker.Unpacker._unpack (pandas/msgpack/_unpacker.cpp:4268)()

/home/pwaller/.local/lib/python3.4/site-packages/pandas/io/packers.py in decode(obj)
    526                               placement=placement)
    527 
--> 528         blocks = [create_block(b) for b in obj['blocks']]
    529         return globals()[obj['klass']](BlockManager(blocks, axes))
    530     elif typ == 'datetime':

/home/pwaller/.local/lib/python3.4/site-packages/pandas/io/packers.py in <listcomp>(.0)
    526                               placement=placement)
    527 
--> 528         blocks = [create_block(b) for b in obj['blocks']]
    529         return globals()[obj['klass']](BlockManager(blocks, axes))
    530     elif typ == 'datetime':

/home/pwaller/.local/lib/python3.4/site-packages/pandas/io/packers.py in create_block(b)
    513 
    514         def create_block(b):
--> 515             values = unconvert(b['values'], dtype_for(b['dtype']),
    516                                b['compress']).reshape(b['shape'])
    517 

/home/pwaller/.local/lib/python3.4/site-packages/pandas/io/packers.py in dtype_for(t)
    181     if t in dtype_dict:
    182         return dtype_dict[t]
--> 183     return np.typeDict[t]
    184 
    185 c2f_dict = {'complex': np.float64,

KeyError: 'category'

Expected Output

A DataFrame containing a categorical variable x.

output of pd.show_versions()

Note I have also tested 0.18.0rc1.

INSTALLED VERSIONS
------------------
commit: None
python: 3.4.3.final.0
python-bits: 64
OS: Linux
OS-release: 4.2.0-30-generic
machine: x86_64
processor: x86_64
byteorder: little
LC_ALL: None
LANG: en_GB.UTF-8

pandas: 0.17.1
nose: 1.3.6
pip: 8.1.0
setuptools: 20.0
Cython: 0.23.4
numpy: 1.10.2
scipy: 0.16.1
statsmodels: 0.6.1
IPython: 4.1.1
sphinx: None
patsy: 0.4.1
dateutil: 2.5.0
pytz: 2015.7
blosc: None
bottleneck: None
tables: 3.2.2
numexpr: 2.4.6
matplotlib: 1.5.0
openpyxl: None
xlrd: None
xlwt: None
xlsxwriter: 0.7.3
lxml: None
bs4: 4.3.2
html5lib: 0.999
httplib2: 0.9
apiclient: None
sqlalchemy: 1.0.11
pymysql: None
psycopg2: None
Jinja2: None

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions