Description
Pandas version checks
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
import pandas as pd
left = pd.DataFrame([[1, 2, 'a'], [5, 6, 'b'], [10, 11, 'c']], columns=['a', 'a', 'left_val'])
right = pd.DataFrame([[x] * 3 for x in [1, 2, 3, 6, 7]], columns=['a', 'a', 'right_val'])
print(pd.merge_asof(left, right, on="a"))
print(pd.merge_asof(left, right, left_on="a", right_on='right_val'))
print(pd.merge_asof(left, right, left_on="left_val", right_on='a'))
Issue Description
I get AttributeError: 'DataFrame' object has no attribute 'dtype'
when I pass non-unique on
, left_on
, or right_on
. Stack trace for on
from code above:
stack trace
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Input In [28], in <cell line: 1>()
----> 1 print(pd.merge_asof(left, right, on="a"))
File ~/opt/anaconda3/envs/modin-dev-py38/lib/python3.8/site-packages/pandas/core/reshape/merge.py:583, in merge_asof(left, right, on, left_on, right_on, left_index, right_index, by, left_by, right_by, suffixes, tolerance, allow_exact_matches, direction)
328 def merge_asof(
329 left: DataFrame | Series,
330 right: DataFrame | Series,
(...)
342 direction: str = "backward",
343 ) -> DataFrame:
344 """
345 Perform a merge by key distance.
346
(...)
581 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN
582 """
--> 583 op = _AsOfMerge(
584 left,
585 right,
586 on=on,
587 left_on=left_on,
588 right_on=right_on,
589 left_index=left_index,
590 right_index=right_index,
591 by=by,
592 left_by=left_by,
593 right_by=right_by,
594 suffixes=suffixes,
595 how="asof",
596 tolerance=tolerance,
597 allow_exact_matches=allow_exact_matches,
598 direction=direction,
599 )
600 return op.get_result()
File ~/opt/anaconda3/envs/modin-dev-py38/lib/python3.8/site-packages/pandas/core/reshape/merge.py:1825, in _AsOfMerge.__init__(self, left, right, on, left_on, right_on, left_index, right_index, by, left_by, right_by, axis, suffixes, copy, fill_method, how, tolerance, allow_exact_matches, direction)
1822 self.allow_exact_matches = allow_exact_matches
1823 self.direction = direction
-> 1825 _OrderedMerge.__init__(
1826 self,
1827 left,
1828 right,
1829 on=on,
1830 left_on=left_on,
1831 right_on=right_on,
1832 left_index=left_index,
1833 right_index=right_index,
1834 axis=axis,
1835 how=how,
1836 suffixes=suffixes,
1837 fill_method=fill_method,
1838 )
File ~/opt/anaconda3/envs/modin-dev-py38/lib/python3.8/site-packages/pandas/core/reshape/merge.py:1726, in _OrderedMerge.__init__(self, left, right, on, left_on, right_on, left_index, right_index, axis, suffixes, fill_method, how)
1710 def __init__(
1711 self,
1712 left: DataFrame | Series,
(...)
1722 how: str = "outer",
1723 ) -> None:
1725 self.fill_method = fill_method
-> 1726 _MergeOperation.__init__(
1727 self,
1728 left,
1729 right,
1730 on=on,
1731 left_on=left_on,
1732 left_index=left_index,
1733 right_index=right_index,
1734 right_on=right_on,
1735 axis=axis,
1736 how=how,
1737 suffixes=suffixes,
1738 sort=True, # factorize sorts
1739 )
File ~/opt/anaconda3/envs/modin-dev-py38/lib/python3.8/site-packages/pandas/core/reshape/merge.py:685, in _MergeOperation.__init__(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, indicator, validate)
681 # stacklevel chosen to be correct when this is reached via pd.merge
682 # (and not DataFrame.join)
683 warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())
--> 685 self.left_on, self.right_on = self._validate_left_right_on(left_on, right_on)
687 cross_col = None
688 if self.how == "cross":
File ~/opt/anaconda3/envs/modin-dev-py38/lib/python3.8/site-packages/pandas/core/reshape/merge.py:1873, in _AsOfMerge._validate_left_right_on(self, left_on, right_on)
1870 lo_dtype = left_on_0.dtype
1871 else:
1872 lo_dtype = (
-> 1873 self.left[left_on_0].dtype
1874 if left_on_0 in self.left.columns
1875 else self.left.index.get_level_values(left_on_0)
1876 )
1877 else:
1878 lo_dtype = self.left.index.dtype
File ~/opt/anaconda3/envs/modin-dev-py38/lib/python3.8/site-packages/pandas/core/generic.py:5902, in NDFrame.__getattr__(self, name)
5895 if (
5896 name not in self._internal_names_set
5897 and name not in self._metadata
5898 and name not in self._accessors
5899 and self._info_axis._can_hold_identifiers_and_holds_name(name)
5900 ):
5901 return self[name]
-> 5902 return object.__getattribute__(self, name)
AttributeError: 'DataFrame' object has no attribute 'dtype'
Expected Behavior
I should get something more informative like the error for non-unique by
, which is MergeError: Data columns not unique: Index(['a'], dtype='object')
Installed Versions
INSTALLED VERSIONS
commit : 8dab54d
python : 3.8.13.final.0
python-bits : 64
OS : Darwin
OS-release : 21.5.0
Version : Darwin Kernel Version 21.5.0: Tue Apr 26 21:08:22 PDT 2022; root:xnu-8020.121.3~4/RELEASE_X86_64
machine : x86_64
processor : i386
byteorder : little
LC_ALL : None
LANG : en_US.UTF-8
LOCALE : en_US.UTF-8
pandas : 1.5.2
numpy : 1.23.1
pytz : 2022.1
dateutil : 2.8.2
setuptools : 61.2.0
pip : 22.1.2
Cython : None
pytest : 7.1.2
hypothesis : None
sphinx : 5.1.1
blosc : None
feather : 0.4.1
xlsxwriter : None
lxml.etree : 4.9.1
html5lib : None
pymysql : None
psycopg2 : None
jinja2 : 3.1.2
IPython : 8.4.0
pandas_datareader: None
bs4 : 4.11.1
bottleneck : None
brotli : None
fastparquet : 2022.11.0
fsspec : 2022.7.1
gcsfs : None
matplotlib : 3.5.2
numba : None
numexpr : 2.8.3
odfpy : None
openpyxl : 3.0.10
pandas_gbq : 0.17.7
pyarrow : 8.0.0
pyreadstat : None
pyxlsb : None
s3fs : 2022.7.1
scipy : 1.9.0
snappy : None
sqlalchemy : 1.4.39
tables : 3.7.0
tabulate : 0.9.0
xarray : 2022.6.0
xlrd : 2.0.1
xlwt : None
zstandard : None
tzdata : None