diff --git a/pandas/_typing.py b/pandas/_typing.py index dad5ffd48caa8..05421f6bd8126 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -343,6 +343,12 @@ def closed(self) -> bool: # dropna AnyAll = Literal["any", "all"] +# merge +MergeHow = Literal["left", "right", "inner", "outer", "cross"] + +# join +JoinHow = Literal["left", "right", "inner", "outer"] + MatplotlibColor = Union[str, Sequence[float]] TimeGrouperOrigin = Union[ "Timestamp", Literal["epoch", "start", "start_day", "end", "end_day"] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5654ba469d05a..c1024b86b4df2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -71,6 +71,7 @@ IndexKeyFunc, IndexLabel, Level, + MergeHow, NaPosition, PythonFuncType, QuantileInterpolation, @@ -9528,7 +9529,7 @@ def join( self, other: DataFrame | Series | list[DataFrame | Series], on: IndexLabel | None = None, - how: str = "left", + how: MergeHow = "left", lsuffix: str = "", rsuffix: str = "", sort: bool = False, @@ -9701,7 +9702,7 @@ def _join_compat( self, other: DataFrame | Series | Iterable[DataFrame | Series], on: IndexLabel | None = None, - how: str = "left", + how: MergeHow = "left", lsuffix: str = "", rsuffix: str = "", sort: bool = False, @@ -9787,7 +9788,7 @@ def _join_compat( def merge( self, right: DataFrame | Series, - how: str = "inner", + how: MergeHow = "inner", on: IndexLabel | None = None, left_on: IndexLabel | None = None, right_on: IndexLabel | None = None, diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 068ff7a0bf1c9..b4c3af17ecce0 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -53,6 +53,7 @@ F, IgnoreRaise, IndexLabel, + JoinHow, Level, Shape, npt, @@ -218,7 +219,7 @@ def join( self, other: Index, *, - how: str_t = "left", + how: JoinHow = "left", level=None, return_indexers: bool = False, sort: bool = False, @@ -4325,7 +4326,7 @@ def join( self, other: Index, *, - how: str_t = ..., + how: JoinHow = ..., level: Level = ..., return_indexers: Literal[True], sort: bool = ..., @@ -4337,7 +4338,7 @@ def join( self, other: Index, *, - how: str_t = ..., + how: JoinHow = ..., level: Level = ..., return_indexers: Literal[False] = ..., sort: bool = ..., @@ -4349,7 +4350,7 @@ def join( self, other: Index, *, - how: str_t = ..., + how: JoinHow = ..., level: Level = ..., return_indexers: bool = ..., sort: bool = ..., @@ -4362,7 +4363,7 @@ def join( self, other: Index, *, - how: str_t = "left", + how: JoinHow = "left", level: Level = None, return_indexers: bool = False, sort: bool = False, @@ -4437,7 +4438,8 @@ def join( return join_index, None, rindexer if self._join_precedence < other._join_precedence: - how = {"right": "left", "left": "right"}.get(how, how) + flip: dict[JoinHow, JoinHow] = {"right": "left", "left": "right"} + how = flip.get(how, how) join_index, lidx, ridx = other.join( self, how=how, level=level, return_indexers=True ) @@ -4483,7 +4485,7 @@ def join( @final def _join_via_get_indexer( - self, other: Index, how: str_t, sort: bool + self, other: Index, how: JoinHow, sort: bool ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: # Fallback if we do not have any fastpaths available based on # uniqueness/monotonicity @@ -4517,7 +4519,7 @@ def _join_via_get_indexer( return join_index, lindexer, rindexer @final - def _join_multi(self, other: Index, how: str_t): + def _join_multi(self, other: Index, how: JoinHow): from pandas.core.indexes.multi import MultiIndex from pandas.core.reshape.merge import restore_dropped_levels_multijoin @@ -4589,7 +4591,8 @@ def _join_multi(self, other: Index, how: str_t): self, other = other, self flip_order = True # flip if join method is right or left - how = {"right": "left", "left": "right"}.get(how, how) + flip: dict[JoinHow, JoinHow] = {"right": "left", "left": "right"} + how = flip.get(how, how) level = other.names.index(jl) result = self._join_level(other, level, how=how) @@ -4600,7 +4603,7 @@ def _join_multi(self, other: Index, how: str_t): @final def _join_non_unique( - self, other: Index, how: str_t = "left" + self, other: Index, how: JoinHow = "left" ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp]]: from pandas.core.reshape.merge import get_join_indexers @@ -4632,7 +4635,7 @@ def _join_non_unique( @final def _join_level( - self, other: Index, level, how: str_t = "left", keep_order: bool = True + self, other: Index, level, how: JoinHow = "left", keep_order: bool = True ) -> tuple[MultiIndex, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: """ The join method *only* affects the level of the resulting @@ -4683,7 +4686,8 @@ def _get_leaf_sorter(labels: list[np.ndarray]) -> npt.NDArray[np.intp]: flip_order = not isinstance(self, MultiIndex) if flip_order: left, right = right, left - how = {"right": "left", "left": "right"}.get(how, how) + flip: dict[JoinHow, JoinHow] = {"right": "left", "left": "right"} + how = flip.get(how, how) assert isinstance(left, MultiIndex) @@ -4780,7 +4784,7 @@ def _get_leaf_sorter(labels: list[np.ndarray]) -> npt.NDArray[np.intp]: @final def _join_monotonic( - self, other: Index, how: str_t = "left" + self, other: Index, how: JoinHow = "left" ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: # We only get here with matching dtypes and both monotonic increasing assert other.dtype == self.dtype diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 74a1051825820..6299978e2dcfe 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -30,6 +30,9 @@ AxisInt, DtypeObj, IndexLabel, + JoinHow, + Literal, + MergeHow, Shape, Suffixes, npt, @@ -98,7 +101,7 @@ def merge( left: DataFrame | Series, right: DataFrame | Series, - how: str = "inner", + how: MergeHow = "inner", on: IndexLabel | None = None, left_on: IndexLabel | None = None, right_on: IndexLabel | None = None, @@ -197,7 +200,7 @@ def merge_ordered( right_by=None, fill_method: str | None = None, suffixes: Suffixes = ("_x", "_y"), - how: str = "outer", + how: JoinHow = "outer", ) -> DataFrame: """ Perform a merge for ordered data with optional filling/interpolation. @@ -612,7 +615,7 @@ class _MergeOperation: """ _merge_type = "merge" - how: str + how: MergeHow | Literal["asof"] on: IndexLabel | None # left_on/right_on may be None when passed, but in validate_specification # get replaced with non-None. @@ -635,7 +638,7 @@ def __init__( self, left: DataFrame | Series, right: DataFrame | Series, - how: str = "inner", + how: MergeHow | Literal["asof"] = "inner", on: IndexLabel | None = None, left_on: IndexLabel | None = None, right_on: IndexLabel | None = None, @@ -1010,7 +1013,8 @@ def _get_join_indexers(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp] def _get_join_info( self, ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: - + # make mypy happy + assert self.how != "cross" left_ax = self.left.axes[self.axis] right_ax = self.right.axes[self.axis] @@ -1072,7 +1076,7 @@ def _create_join_index( index: Index, other_index: Index, indexer: npt.NDArray[np.intp], - how: str = "left", + how: JoinHow = "left", ) -> Index: """ Create a join index by rearranging one index to match another @@ -1406,7 +1410,7 @@ def _maybe_coerce_merge_keys(self) -> None: def _create_cross_configuration( self, left: DataFrame, right: DataFrame - ) -> tuple[DataFrame, DataFrame, str, str]: + ) -> tuple[DataFrame, DataFrame, JoinHow, str]: """ Creates the configuration to dispatch the cross operation to inner join, e.g. adding a join column and resetting parameters. Join column is added @@ -1424,7 +1428,7 @@ def _create_cross_configuration( to join over. """ cross_col = f"_cross_{uuid.uuid4()}" - how = "inner" + how: JoinHow = "inner" return ( left.assign(**{cross_col: 1}), right.assign(**{cross_col: 1}), @@ -1584,7 +1588,11 @@ def _validate(self, validate: str) -> None: def get_join_indexers( - left_keys, right_keys, sort: bool = False, how: str = "inner", **kwargs + left_keys, + right_keys, + sort: bool = False, + how: MergeHow | Literal["asof"] = "inner", + **kwargs, ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: """ @@ -1757,7 +1765,7 @@ def __init__( axis: AxisInt = 1, suffixes: Suffixes = ("_x", "_y"), fill_method: str | None = None, - how: str = "outer", + how: JoinHow | Literal["asof"] = "outer", ) -> None: self.fill_method = fill_method @@ -1847,7 +1855,7 @@ def __init__( suffixes: Suffixes = ("_x", "_y"), copy: bool = True, fill_method: str | None = None, - how: str = "asof", + how: Literal["asof"] = "asof", tolerance=None, allow_exact_matches: bool = True, direction: str = "backward", @@ -2256,7 +2264,10 @@ def _left_join_on_index( def _factorize_keys( - lk: ArrayLike, rk: ArrayLike, sort: bool = True, how: str = "inner" + lk: ArrayLike, + rk: ArrayLike, + sort: bool = True, + how: MergeHow | Literal["asof"] = "inner", ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp], int]: """ Encode left and right keys as enumerated types.