Description
Pandas version checks
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
from datetime import datetime
data_A = list()
data_B = list()
for i in range(10):
data_A.append({
"id": i,
"created_date": datetime.today(),
"created_at": datetime.now(),
})
data_B.append({
"id": i if i % 2 == 0 else 3*i,
"created_date": datetime.today(),
"created_at": datetime.now(),
})
df_A = pd.DataFrame.from_dict(data_A)
df_B = pd.DataFrame.from_dict(data_B)
df_A.merge(df_B, how="full", on="id")
---------------------------------------------------------------------------
UnboundLocalError Traceback (most recent call last)
Cell In[59], line 23
19 df_A = pd.DataFrame.from_dict(data_A)
21 df_B = pd.DataFrame.from_dict(data_B)
---> 23 df_A.merge(df_B, how="full", on="id")
File ~/Documents/adhoc/.local_lab/lib/python3.12/site-packages/pandas/core/frame.py:10832, in DataFrame.merge(self, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
10813 @Substitution("")
10814 @Appender(_merge_doc, indents=2)
10815 def merge(
(...)
10828 validate: MergeValidate | None = None,
10829 ) -> DataFrame:
10830 from pandas.core.reshape.merge import merge
> 10832 return merge(
10833 self,
10834 right,
10835 how=how,
10836 on=on,
10837 left_on=left_on,
10838 right_on=right_on,
10839 left_index=left_index,
10840 right_index=right_index,
10841 sort=sort,
10842 suffixes=suffixes,
10843 copy=copy,
10844 indicator=indicator,
10845 validate=validate,
10846 )
File ~/Documents/adhoc/.local_lab/lib/python3.12/site-packages/pandas/core/reshape/merge.py:184, in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
169 else:
170 op = _MergeOperation(
171 left_df,
172 right_df,
(...)
182 validate=validate,
183 )
--> 184 return op.get_result(copy=copy)
File ~/Documents/adhoc/.local_lab/lib/python3.12/site-packages/pandas/core/reshape/merge.py:886, in _MergeOperation.get_result(self, copy)
883 if self.indicator:
884 self.left, self.right = self._indicator_pre_merge(self.left, self.right)
--> 886 join_index, left_indexer, right_indexer = self._get_join_info()
888 result = self._reindex_and_concat(
889 join_index, left_indexer, right_indexer, copy=copy
890 )
891 result = result.__finalize__(self, method=self._merge_type)
File ~/Documents/adhoc/.local_lab/lib/python3.12/site-packages/pandas/core/reshape/merge.py:1151, in _MergeOperation._get_join_info(self)
1147 join_index, right_indexer, left_indexer = _left_join_on_index(
1148 right_ax, left_ax, self.right_join_keys, sort=self.sort
1149 )
1150 else:
-> 1151 (left_indexer, right_indexer) = self._get_join_indexers()
1153 if self.right_index:
1154 if len(self.left) > 0:
File ~/Documents/adhoc/.local_lab/lib/python3.12/site-packages/pandas/core/reshape/merge.py:1125, in _MergeOperation._get_join_indexers(self)
1123 # make mypy happy
1124 assert self.how != "asof"
-> 1125 return get_join_indexers(
1126 self.left_join_keys, self.right_join_keys, sort=self.sort, how=self.how
1127 )
File ~/Documents/adhoc/.local_lab/lib/python3.12/site-packages/pandas/core/reshape/merge.py:1759, in get_join_indexers(left_keys, right_keys, sort, how)
1757 _, lidx, ridx = left.join(right, how=how, return_indexers=True, sort=sort)
1758 else:
-> 1759 lidx, ridx = get_join_indexers_non_unique(
1760 left._values, right._values, sort, how
1761 )
1763 if lidx is not None and is_range_indexer(lidx, len(left)):
1764 lidx = None
File ~/Documents/adhoc/.local_lab/lib/python3.12/site-packages/pandas/core/reshape/merge.py:1802, in get_join_indexers_non_unique(left, right, sort, how)
1800 elif how == "outer":
1801 lidx, ridx = libjoin.full_outer_join(lkey, rkey, count)
-> 1802 return lidx, ridx
UnboundLocalError: cannot access local variable 'lidx' where it is not associated with a value
Issue Description
I wrote a full outer join between 2 dataframes and it resulted on UnboundLocalError.
Expected Behavior
Have a joined dataframe of the two dataframes, where half of the rows would match and the other half wouldn't.
Installed Versions
INSTALLED VERSIONS
commit : d9cdd2e
python : 3.12.4.final.0
python-bits : 64
OS : Linux
OS-release : 6.5.0-45-generic
Version : #45~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Mon Jul 15 16:40:02 UTC 2
machine : x86_64
processor : x86_64
byteorder : little
LC_ALL : None
LANG : en_US.UTF-8
LOCALE : en_US.UTF-8
pandas : 2.2.2
numpy : 2.0.1
pytz : 2024.1
dateutil : 2.9.0.post0
setuptools : 71.1.0
pip : 24.2
Cython : None
pytest : None
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : None
lxml.etree : 5.2.2
html5lib : None
pymysql : None
psycopg2 : 2.9.9
jinja2 : 3.1.4
IPython : 8.26.0
pandas_datareader : None
adbc-driver-postgresql: None
adbc-driver-sqlite : None
bs4 : 4.12.3
bottleneck : None
dataframe-api-compat : None
fastparquet : None
fsspec : None
gcsfs : None
matplotlib : 3.9.1
numba : None
numexpr : None
odfpy : None
openpyxl : None
pandas_gbq : None
pyarrow : None
pyreadstat : None
python-calamine : None
pyxlsb : None
s3fs : None
scipy : None
sqlalchemy : 2.0.31
tables : None
tabulate : None
xarray : None
xlrd : None
zstandard : None
tzdata : 2024.1
qtpy : None
pyqt5 : None