From 5287dfa4a4403d159b22659e3cca769c2db4d7c1 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 21 Jun 2023 08:18:34 -0700 Subject: [PATCH 1/2] PERF: avoid unnecessary casting in merge --- pandas/core/reshape/merge.py | 38 ++++++++---------------------------- 1 file changed, 8 insertions(+), 30 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index cbcdc3813c24d..07bde9a8a4b21 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -30,7 +30,6 @@ AnyArrayLike, ArrayLike, AxisInt, - DtypeObj, IndexLabel, JoinHow, MergeHow, @@ -49,7 +48,6 @@ from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.cast import find_common_type from pandas.core.dtypes.common import ( - ensure_float64, ensure_int64, ensure_object, is_bool, @@ -1840,23 +1838,6 @@ def _asof_by_function(direction: str): return getattr(libjoin, name, None) -_type_casters = { - "int64_t": ensure_int64, - "double": ensure_float64, - "object": ensure_object, -} - - -def _get_cython_type_upcast(dtype: DtypeObj) -> str: - """Upcast a dtype to 'int64_t', 'double', or 'object'""" - if is_integer_dtype(dtype): - return "int64_t" - elif is_float_dtype(dtype): - return "double" - else: - return "object" - - class _AsOfMerge(_OrderedMerge): _merge_type = "asof_merge" @@ -2163,23 +2144,20 @@ def injection(obj: AnyArrayLike): if len(left_by_values) == 1: lbv = left_by_values[0] rbv = right_by_values[0] + + # TODO: conversions for EAs that can be no-copy. + lbv = np.asarray(lbv) + rbv = np.asarray(rbv) else: # We get here with non-ndarrays in test_merge_by_col_tz_aware # and test_merge_groupby_multiple_column_with_categorical_column lbv = flip(left_by_values) rbv = flip(right_by_values) + lbv = ensure_object(lbv) + rbv = ensure_object(rbv) - # upcast 'by' parameter because HashTable is limited - by_type = _get_cython_type_upcast(lbv.dtype) - by_type_caster = _type_casters[by_type] - # error: Incompatible types in assignment (expression has type - # "ndarray[Any, dtype[generic]]", variable has type - # "List[Union[Union[ExtensionArray, ndarray[Any, Any]], Index, Series]]") - left_by_values = by_type_caster(lbv) # type: ignore[assignment] - # error: Incompatible types in assignment (expression has type - # "ndarray[Any, dtype[generic]]", variable has type - # "List[Union[Union[ExtensionArray, ndarray[Any, Any]], Index, Series]]") - right_by_values = by_type_caster(rbv) # type: ignore[assignment] + right_by_values = rbv + left_by_values = lbv # choose appropriate function by type func = _asof_by_function(self.direction) From 1f8f6e6625b47925329ab0b1c9d2cc96a5875dc2 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 21 Jun 2023 10:07:09 -0700 Subject: [PATCH 2/2] mypy fixup --- pandas/core/reshape/merge.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 07bde9a8a4b21..a09357e70d06b 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -2156,8 +2156,16 @@ def injection(obj: AnyArrayLike): lbv = ensure_object(lbv) rbv = ensure_object(rbv) - right_by_values = rbv - left_by_values = lbv + # error: Incompatible types in assignment (expression has type + # "Union[ndarray[Any, dtype[Any]], ndarray[Any, dtype[object_]]]", + # variable has type "List[Union[Union[ExtensionArray, + # ndarray[Any, Any]], Index, Series]]") + right_by_values = rbv # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type + # "Union[ndarray[Any, dtype[Any]], ndarray[Any, dtype[object_]]]", + # variable has type "List[Union[Union[ExtensionArray, + # ndarray[Any, Any]], Index, Series]]") + left_by_values = lbv # type: ignore[assignment] # choose appropriate function by type func = _asof_by_function(self.direction)