Skip to content

Categorical can not be used as key in merges #10832

Closed
@jankatins

Description

@jankatins
df1 = pd.DataFrame({"a": list("ABCD"), "b": [1,2,3,4]})
df2 = pd.DataFrame({"a": list("BCDF"), "b": [1,2,3,4]})
df1["c"] = df1["a"].astype("category")
df2["c"] = df2["a"].astype("category")
pd.merge(df1, df2, on=["c"], how="outer")

Results in:

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-97-42d4959051b3> in <module>()
      3 df1["c"] = df1["a"].astype("category")
      4 df2["c"] = df2["a"].astype("category")
----> 5 pd.merge(df1, df2, on=["c"], how="outer")

C:\portabel\miniconda\envs\sc\lib\site-packages\pandas\tools\merge.pyc in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy)
     36                          right_index=right_index, sort=sort, suffixes=suffixes,
     37                          copy=copy)
---> 38     return op.get_result()
     39 if __debug__:
     40     merge.__doc__ = _merge_doc % '\nleft : DataFrame'

C:\portabel\miniconda\envs\sc\lib\site-packages\pandas\tools\merge.pyc in get_result(self)
    203         result = typ(result_data).__finalize__(self, method='merge')
    204 
--> 205         self._maybe_add_join_keys(result, left_indexer, right_indexer)
    206 
    207         return result

C:\portabel\miniconda\envs\sc\lib\site-packages\pandas\tools\merge.pyc in _maybe_add_join_keys(self, result, left_indexer, right_indexer)
    228                         key_col.put(
    229                             na_indexer, com.take_1d(self.right_join_keys[i],
--> 230                                                     right_na_indexer))
    231                     elif name in self.right:
    232                         na_indexer = (right_indexer == -1).nonzero()[0]

C:\portabel\miniconda\envs\sc\lib\site-packages\pandas\core\common.pyc in take_nd(arr, indexer, axis, out, fill_value, mask_info, allow_fill)
    829         out_shape[axis] = len(indexer)
    830         out_shape = tuple(out_shape)
--> 831         if arr.flags.f_contiguous and axis == arr.ndim - 1:
    832             # minor tweak that can make an order-of-magnitude difference
    833             # for dataframes initialized directly from 2-d ndarrays

AttributeError: 'Categorical' object has no attribute 'flags'
pd.__version__
'0.16.2'

Metadata

Metadata

Assignees

No one assigned

    Labels

    CategoricalCategorical Data TypeReshapingConcat, Merge/Join, Stack/Unstack, Explode

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions