Skip to content

Commit 438a08d

Browse files
committed
BUG: Regression in merging Categorical and object dtypes (GH9426)
1 parent 1fab6fc commit 438a08d

File tree

4 files changed

+40
-4
lines changed

4 files changed

+40
-4
lines changed

doc/source/whatsnew/v0.16.0.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,7 @@ Bug Fixes
521521

522522

523523
- ``SparseSeries`` and ``SparsePanel`` now accept zero argument constructors (same as their non-sparse counterparts) (:issue:`9272`).
524-
524+
- Regression in merging Categoricals and object dtypes (:issue:`9426`)
525525
- Bug in ``read_csv`` with buffer overflows with certain malformed input files (:issue:`9205`)
526526
- Bug in groupby MultiIndex with missing pair (:issue:`9049`, :issue:`9344`)
527527
- Fixed bug in ``Series.groupby`` where grouping on ``MultiIndex`` levels would ignore the sort argument (:issue:`9444`)

pandas/core/common.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1146,7 +1146,9 @@ def _maybe_promote(dtype, fill_value=np.nan):
11461146
dtype = np.object_
11471147

11481148
# in case we have a string that looked like a number
1149-
if issubclass(np.dtype(dtype).type, compat.string_types):
1149+
if is_categorical_dtype(dtype):
1150+
dtype = dtype
1151+
elif issubclass(np.dtype(dtype).type, compat.string_types):
11501152
dtype = np.object_
11511153

11521154
return dtype, fill_value

pandas/core/internals.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4327,8 +4327,9 @@ def dtype(self):
43274327
if not self.needs_filling:
43284328
return self.block.dtype
43294329
else:
4330-
return np.dtype(com._maybe_promote(self.block.dtype,
4331-
self.block.fill_value)[0])
4330+
return com._get_dtype(com._maybe_promote(self.block.dtype,
4331+
self.block.fill_value)[0])
4332+
43324333
return self._dtype
43334334

43344335
@cache_readonly

pandas/tests/test_categorical.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2423,6 +2423,39 @@ def f():
24232423
df.append(df_wrong_categories)
24242424
self.assertRaises(ValueError, f)
24252425

2426+
2427+
def test_merge(self):
2428+
# GH 9426
2429+
2430+
right = DataFrame({'c': {0: 'a', 1: 'b', 2: 'c', 3: 'd', 4: 'e'},
2431+
'd': {0: 'null', 1: 'null', 2: 'null', 3: 'null', 4: 'null'}})
2432+
left = DataFrame({'a': {0: 'f', 1: 'f', 2: 'f', 3: 'f', 4: 'f'},
2433+
'b': {0: 'g', 1: 'g', 2: 'g', 3: 'g', 4: 'g'}})
2434+
df = pd.merge(left, right, how='left', left_on='b', right_on='c')
2435+
2436+
# object-object
2437+
expected = df.copy()
2438+
2439+
# object-cat
2440+
cright = right.copy()
2441+
cright['d'] = cright['d'].astype('category')
2442+
result = pd.merge(left, cright, how='left', left_on='b', right_on='c')
2443+
tm.assert_frame_equal(result, expected)
2444+
2445+
# cat-object
2446+
cleft = left.copy()
2447+
cleft['b'] = cleft['b'].astype('category')
2448+
result = pd.merge(cleft, cright, how='left', left_on='b', right_on='c')
2449+
tm.assert_frame_equal(result, expected)
2450+
2451+
# cat-cat
2452+
cright = right.copy()
2453+
cright['d'] = cright['d'].astype('category')
2454+
cleft = left.copy()
2455+
cleft['b'] = cleft['b'].astype('category')
2456+
result = pd.merge(cleft, cright, how='left', left_on='b', right_on='c')
2457+
tm.assert_frame_equal(result, expected)
2458+
24262459
def test_na_actions(self):
24272460

24282461
cat = pd.Categorical([1,2,3,np.nan], categories=[1,2,3])

0 commit comments

Comments
 (0)