Description
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
(optional) I have confirmed this bug exists on the master branch of pandas.
Code Sample, a copy-pastable example
In [1]: import numpy as np
In [2]: import pandas as pd
In [3]: right = pd.DataFrame(data={'C': [0.5274]}, index=pd.DatetimeIndex(['2021-04-08 21:21:14+00:00'], dtype='datetime64[ns, UTC]', name='Time (UTC)', freq=None))
In [4]: left = pd.DataFrame(data={'A': [None], 'B': [np.nan]}, index=pd.Index([None], dtype='object', name='Maybe Time (UTC)'))
In [5]: pd.concat([left, right], axis='columns')
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
<ipython-input-5-27d2416870b3> in <module>
----> 1 pd.concat([left, right], axis='columns')
~/.miniconda/lib/python3.9/site-packages/pandas/core/reshape/concat.py in concat(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)
283 ValueError: Indexes have overlapping values: ['a']
284 """
--> 285 op = _Concatenator(
286 objs,
287 axis=axis,
~/.miniconda/lib/python3.9/site-packages/pandas/core/reshape/concat.py in __init__(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort)
465 self.copy = copy
466
--> 467 self.new_axes = self._get_new_axes()
468
469 def get_result(self):
~/.miniconda/lib/python3.9/site-packages/pandas/core/reshape/concat.py in _get_new_axes(self)
535 def _get_new_axes(self) -> List[Index]:
536 ndim = self._get_result_dim()
--> 537 return [
538 self._get_concat_axis() if i == self.bm_axis else self._get_comb_axis(i)
539 for i in range(ndim)
~/.miniconda/lib/python3.9/site-packages/pandas/core/reshape/concat.py in <listcomp>(.0)
536 ndim = self._get_result_dim()
537 return [
--> 538 self._get_concat_axis() if i == self.bm_axis else self._get_comb_axis(i)
539 for i in range(ndim)
540 ]
~/.miniconda/lib/python3.9/site-packages/pandas/core/reshape/concat.py in _get_comb_axis(self, i)
542 def _get_comb_axis(self, i: int) -> Index:
543 data_axis = self.objs[0]._get_block_manager_axis(i)
--> 544 return get_objs_combined_axis(
545 self.objs,
546 axis=data_axis,
~/.miniconda/lib/python3.9/site-packages/pandas/core/indexes/api.py in get_objs_combined_axis(objs, intersect, axis, sort, copy)
90 """
91 obs_idxes = [obj._get_axis(axis) for obj in objs]
---> 92 return _get_combined_index(obs_idxes, intersect=intersect, sort=sort, copy=copy)
93
94
~/.miniconda/lib/python3.9/site-packages/pandas/core/indexes/api.py in _get_combined_index(indexes, intersect, sort, copy)
143 index = index.intersection(other)
144 else:
--> 145 index = union_indexes(indexes, sort=sort)
146 index = ensure_index(index)
147
~/.miniconda/lib/python3.9/site-packages/pandas/core/indexes/api.py in union_indexes(indexes, sort)
215 else:
216 for other in indexes[1:]:
--> 217 result = result.union(other)
218 return result
219 elif kind == "array":
~/.miniconda/lib/python3.9/site-packages/pandas/core/indexes/base.py in union(self, other, sort)
2698
2699 if not self._can_union_without_object_cast(other):
-> 2700 return self._union_incompatible_dtypes(other, sort=sort)
2701
2702 result = self._union(other, sort=sort)
~/.miniconda/lib/python3.9/site-packages/pandas/core/indexes/base.py in _union_incompatible_dtypes(self, other, sort)
2616 # cast to Index for when `other` is list-like
2617 other = Index(other).astype(object, copy=False)
-> 2618 return Index.union(this, other, sort=sort).astype(object, copy=False)
2619
2620 def _can_union_without_object_cast(self, other) -> bool:
~/.miniconda/lib/python3.9/site-packages/pandas/core/indexes/base.py in union(self, other, sort)
2702 result = self._union(other, sort=sort)
2703
-> 2704 return self._wrap_setop_result(other, result)
2705
2706 def _union(self, other, sort):
~/.miniconda/lib/python3.9/site-packages/pandas/core/indexes/base.py in _wrap_setop_result(self, other, result)
2786 return result
2787 else:
-> 2788 return self._shallow_copy(result, name=name)
2789
2790 # TODO: standardize return type of non-union setops type(self vs other)
~/.miniconda/lib/python3.9/site-packages/pandas/core/indexes/base.py in _shallow_copy(self, values, name)
524
525 if values is not None:
--> 526 return self._simple_new(values, name=name)
527
528 result = self._simple_new(self._values, name=name)
~/.miniconda/lib/python3.9/site-packages/pandas/core/indexes/base.py in _simple_new(cls, values, name)
431 Must be careful not to recurse.
432 """
--> 433 assert isinstance(values, np.ndarray), type(values)
434
435 result = object.__new__(cls)
AssertionError: <class 'pandas.core.arrays.datetimes.DatetimeArray'>
Problem description
This appears to be a regression when concatenating DataFrames with partially null DatetimeIndices. I don't know precisely in which version it was introduced, but it worked with v0.25.3 and python 3.6.
Expected Output
Concatenated DataFrame from pandas v1.0.5:
A B C
NaN None NaN NaN
2021-04-08 21:21:14+00:00 None NaN 0.5274
Output of pd.show_versions()
INSTALLED VERSIONS
commit : f2c8480
python : 3.9.2.final.0
python-bits : 64
OS : Darwin
OS-release : 16.7.0
Version : Darwin Kernel Version 16.7.0: Thu Dec 20 21:53:35 PST 2018; root:xnu-3789.73.31~1/RELEASE_X86_64
machine : x86_64
processor : i386
byteorder : little
LC_ALL : None
LANG : en_CA.UTF-8
LOCALE : en_CA.UTF-8
pandas : 1.2.3
numpy : 1.20.2
pytz : 2021.1
dateutil : 2.8.1
pip : 21.0.1
setuptools : 49.6.0.post20210108
Cython : None
pytest : 6.2.3
hypothesis : None
sphinx : 3.5.3
blosc : None
feather : None
xlsxwriter : 1.3.8
lxml.etree : None
html5lib : None
pymysql : None
psycopg2 : None
jinja2 : 2.11.3
IPython : 7.22.0
pandas_datareader: None
bs4 : None
bottleneck : 1.3.2
fsspec : None
fastparquet : None
gcsfs : None
matplotlib : 3.3.4
numexpr : 2.7.3
odfpy : None
openpyxl : 3.0.7
pandas_gbq : None
pyarrow : None
pyxlsb : None
s3fs : None
scipy : 1.6.2
sqlalchemy : None
tables : 3.6.1
tabulate : None
xarray : 0.17.0
xlrd : 2.0.1
xlwt : None
numba : None