Closed
Description
For example,
>>> import pandas as pd
>>> df1 = pd.DataFrame(data=[[1, None], [2, None]], columns=['a', 'b'])
>>> df2 = pd.DataFrame(data=[[3, None], [4, None]], columns=['a', 'b'])
>>> df1
a b
0 1 None
1 2 None
>>> df1.dtypes
a int64
b object
dtype: object
>>> df2
a b
0 3 None
1 4 None
>>> df2.dtypes
a int64
b object
dtype: object
>>> pd.concat([df1, df2])
a b
0 1 NaN
1 2 NaN
0 3 NaN
1 4 NaN
>>> pd.concat([df1, df2]).dtypes
a int64
b object
dtype: object
I have found that this is a direct result of line 4102-4103 in core/internals.py
4101 # create the result
4102 if 'object' in upcast_classes:
4103 return np.dtype(np.object_), np.nan
4104 elif 'bool' in upcast_classes:
4105 if has_none_blocks:
4106 return np.dtype(np.object_), np.nan
4107 else:
4108 return np.dtype(np.bool_), None
4109 elif 'category' in upcast_classes:
4110 return com.CategoricalDtype(), np.nan
4111 elif 'float' in upcast_classes:
4112 return np.dtype(np.float64), np.nan
4113 elif 'datetime' in upcast_classes:
4114 return np.dtype('M8[ns]'), tslib.iNaT
4115 elif 'timedelta' in upcast_classes:
4116 return np.dtype('m8[ns]'), tslib.iNaT
4117 else: # pragma
4118 raise AssertionError("invalid dtype determination in get_concat_dtype")