|
4 | 4 |
|
5 | 5 | import copy
|
6 | 6 | import datetime
|
| 7 | +import hashlib |
7 | 8 | from functools import partial
|
8 | 9 | import string
|
9 | 10 | from typing import TYPE_CHECKING, Optional, Tuple, cast
|
@@ -591,8 +592,6 @@ def __init__(
|
591 | 592 | ):
|
592 | 593 | _left = _validate_operand(left)
|
593 | 594 | _right = _validate_operand(right)
|
594 |
| - if how == "cross": |
595 |
| - _left, _right, how, on = self._create_cross_configuration(_left, _right, on) |
596 | 595 | self.left = self.orig_left = _left
|
597 | 596 | self.right = self.orig_right = _right
|
598 | 597 | self.how = how
|
@@ -645,6 +644,16 @@ def __init__(
|
645 | 644 |
|
646 | 645 | self._validate_specification()
|
647 | 646 |
|
| 647 | + if self.how == "cross": |
| 648 | + ( |
| 649 | + self.left, |
| 650 | + self.right, |
| 651 | + self.how, |
| 652 | + cross_col, |
| 653 | + ) = self._create_cross_configuration(self.left, self.right) |
| 654 | + self.left_on = self.right_on = [cross_col] |
| 655 | + self._cross = cross_col |
| 656 | + |
648 | 657 | # note this function has side effects
|
649 | 658 | (
|
650 | 659 | self.left_join_keys,
|
@@ -1210,35 +1219,31 @@ def _maybe_coerce_merge_keys(self):
|
1210 | 1219 | self.right = self.right.assign(**{name: self.right[name].astype(typ)})
|
1211 | 1220 |
|
1212 | 1221 | def _create_cross_configuration(
|
1213 |
| - self, _left, _right, on |
| 1222 | + self, _left, _right |
1214 | 1223 | ) -> Tuple["DataFrame", "DataFrame", str, str]:
|
1215 |
| - if on is not None: |
1216 |
| - raise MergeError( |
1217 |
| - "Can not pass any merge columns when using cross as merge method" |
1218 |
| - ) |
1219 |
| - cross_col = f"{max([*_left.columns, *_right.columns])}_cross" |
1220 |
| - _left = _left.copy() |
1221 |
| - _right = _right.copy() |
1222 |
| - _left.insert(loc=0, value=1, column=cross_col) |
1223 |
| - _right.insert(loc=0, value=1, column=cross_col) |
| 1224 | + cross_col = f"_cross_{hashlib.md5().hexdigest()}" |
1224 | 1225 | how = "inner"
|
1225 |
| - on = cross_col |
1226 |
| - self._cross = cross_col |
1227 |
| - return _left, _right, how, on |
| 1226 | + return ( |
| 1227 | + _left.assign(**{cross_col: 1}), |
| 1228 | + _right.assign(**{cross_col: 1}), |
| 1229 | + how, |
| 1230 | + cross_col, |
| 1231 | + ) |
1228 | 1232 |
|
1229 | 1233 | def _validate_specification(self):
|
1230 |
| - if hasattr(self, "_cross"): |
| 1234 | + if self.how == "cross": |
1231 | 1235 | if (
|
1232 | 1236 | self.left_index
|
1233 | 1237 | or self.right_index
|
1234 | 1238 | or self.right_on is not None
|
1235 | 1239 | or self.left_on is not None
|
| 1240 | + or self.on is not None |
1236 | 1241 | ):
|
1237 | 1242 | raise MergeError(
|
1238 | 1243 | "Can not pass any merge columns when using cross as merge method"
|
1239 | 1244 | )
|
1240 | 1245 | # Hm, any way to make this logic less complicated??
|
1241 |
| - if self.on is None and self.left_on is None and self.right_on is None: |
| 1246 | + elif self.on is None and self.left_on is None and self.right_on is None: |
1242 | 1247 |
|
1243 | 1248 | if self.left_index and self.right_index:
|
1244 | 1249 | self.left_on, self.right_on = (), ()
|
@@ -1302,7 +1307,7 @@ def _validate_specification(self):
|
1302 | 1307 | 'of levels in the index of "left"'
|
1303 | 1308 | )
|
1304 | 1309 | self.left_on = [None] * n
|
1305 |
| - if len(self.right_on) != len(self.left_on): |
| 1310 | + if self.how != "cross" and len(self.right_on) != len(self.left_on): |
1306 | 1311 | raise ValueError("len(right_on) must equal len(left_on)")
|
1307 | 1312 |
|
1308 | 1313 | def _validate(self, validate: str):
|
|
0 commit comments