Skip to content

BUG: UnboundLocalError when full outer merging two dataframes #59422

Closed
@paulochf

Description

@paulochf

Pandas version checks

  • I have checked that this issue has not already been reported.

  • I have confirmed this bug exists on the latest version of pandas.

  • I have confirmed this bug exists on the main branch of pandas.

Reproducible Example

from datetime import datetime

data_A = list()
data_B = list()

for i in range(10):
    data_A.append({
        "id": i,
        "created_date": datetime.today(),
        "created_at": datetime.now(),
    })

    data_B.append({
        "id": i if i % 2 == 0 else 3*i,
        "created_date": datetime.today(),
        "created_at": datetime.now(),
    })

df_A = pd.DataFrame.from_dict(data_A)

df_B = pd.DataFrame.from_dict(data_B)

df_A.merge(df_B, how="full", on="id")
---------------------------------------------------------------------------
UnboundLocalError                         Traceback (most recent call last)
Cell In[59], line 23
     19 df_A = pd.DataFrame.from_dict(data_A)
     21 df_B = pd.DataFrame.from_dict(data_B)
---> 23 df_A.merge(df_B, how="full", on="id")

File ~/Documents/adhoc/.local_lab/lib/python3.12/site-packages/pandas/core/frame.py:10832, in DataFrame.merge(self, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
  10813 @Substitution("")
  10814 @Appender(_merge_doc, indents=2)
  10815 def merge(
   (...)
  10828     validate: MergeValidate | None = None,
  10829 ) -> DataFrame:
  10830     from pandas.core.reshape.merge import merge
> 10832     return merge(
  10833         self,
  10834         right,
  10835         how=how,
  10836         on=on,
  10837         left_on=left_on,
  10838         right_on=right_on,
  10839         left_index=left_index,
  10840         right_index=right_index,
  10841         sort=sort,
  10842         suffixes=suffixes,
  10843         copy=copy,
  10844         indicator=indicator,
  10845         validate=validate,
  10846     )

File ~/Documents/adhoc/.local_lab/lib/python3.12/site-packages/pandas/core/reshape/merge.py:184, in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
    169 else:
    170     op = _MergeOperation(
    171         left_df,
    172         right_df,
   (...)
    182         validate=validate,
    183     )
--> 184     return op.get_result(copy=copy)

File ~/Documents/adhoc/.local_lab/lib/python3.12/site-packages/pandas/core/reshape/merge.py:886, in _MergeOperation.get_result(self, copy)
    883 if self.indicator:
    884     self.left, self.right = self._indicator_pre_merge(self.left, self.right)
--> 886 join_index, left_indexer, right_indexer = self._get_join_info()
    888 result = self._reindex_and_concat(
    889     join_index, left_indexer, right_indexer, copy=copy
    890 )
    891 result = result.__finalize__(self, method=self._merge_type)

File ~/Documents/adhoc/.local_lab/lib/python3.12/site-packages/pandas/core/reshape/merge.py:1151, in _MergeOperation._get_join_info(self)
   1147     join_index, right_indexer, left_indexer = _left_join_on_index(
   1148         right_ax, left_ax, self.right_join_keys, sort=self.sort
   1149     )
   1150 else:
-> 1151     (left_indexer, right_indexer) = self._get_join_indexers()
   1153     if self.right_index:
   1154         if len(self.left) > 0:

File ~/Documents/adhoc/.local_lab/lib/python3.12/site-packages/pandas/core/reshape/merge.py:1125, in _MergeOperation._get_join_indexers(self)
   1123 # make mypy happy
   1124 assert self.how != "asof"
-> 1125 return get_join_indexers(
   1126     self.left_join_keys, self.right_join_keys, sort=self.sort, how=self.how
   1127 )

File ~/Documents/adhoc/.local_lab/lib/python3.12/site-packages/pandas/core/reshape/merge.py:1759, in get_join_indexers(left_keys, right_keys, sort, how)
   1757     _, lidx, ridx = left.join(right, how=how, return_indexers=True, sort=sort)
   1758 else:
-> 1759     lidx, ridx = get_join_indexers_non_unique(
   1760         left._values, right._values, sort, how
   1761     )
   1763 if lidx is not None and is_range_indexer(lidx, len(left)):
   1764     lidx = None

File ~/Documents/adhoc/.local_lab/lib/python3.12/site-packages/pandas/core/reshape/merge.py:1802, in get_join_indexers_non_unique(left, right, sort, how)
   1800 elif how == "outer":
   1801     lidx, ridx = libjoin.full_outer_join(lkey, rkey, count)
-> 1802 return lidx, ridx

UnboundLocalError: cannot access local variable 'lidx' where it is not associated with a value

Issue Description

I wrote a full outer join between 2 dataframes and it resulted on UnboundLocalError.

Expected Behavior

Have a joined dataframe of the two dataframes, where half of the rows would match and the other half wouldn't.

Installed Versions

INSTALLED VERSIONS

commit : d9cdd2e
python : 3.12.4.final.0
python-bits : 64
OS : Linux
OS-release : 6.5.0-45-generic
Version : #45~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Mon Jul 15 16:40:02 UTC 2
machine : x86_64
processor : x86_64
byteorder : little
LC_ALL : None
LANG : en_US.UTF-8
LOCALE : en_US.UTF-8

pandas : 2.2.2
numpy : 2.0.1
pytz : 2024.1
dateutil : 2.9.0.post0
setuptools : 71.1.0
pip : 24.2
Cython : None
pytest : None
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : None
lxml.etree : 5.2.2
html5lib : None
pymysql : None
psycopg2 : 2.9.9
jinja2 : 3.1.4
IPython : 8.26.0
pandas_datareader : None
adbc-driver-postgresql: None
adbc-driver-sqlite : None
bs4 : 4.12.3
bottleneck : None
dataframe-api-compat : None
fastparquet : None
fsspec : None
gcsfs : None
matplotlib : 3.9.1
numba : None
numexpr : None
odfpy : None
openpyxl : None
pandas_gbq : None
pyarrow : None
pyreadstat : None
python-calamine : None
pyxlsb : None
s3fs : None
scipy : None
sqlalchemy : 2.0.31
tables : None
tabulate : None
xarray : None
xlrd : None
zstandard : None
tzdata : 2024.1
qtpy : None
pyqt5 : None

Metadata

Metadata

Assignees

Labels

EnhancementError ReportingIncorrect or improved errors from pandasReshapingConcat, Merge/Join, Stack/Unstack, Explode

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions