diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index c6b5816d12061..5d9732deeae4a 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -447,6 +447,7 @@ Performance improvements - Performance improvements to :func:`read_sas` (:issue:`47403`, :issue:`47405`, :issue:`47656`, :issue:`48502`) - Memory improvement in :meth:`RangeIndex.sort_values` (:issue:`48801`) - Performance improvement in :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` when ``by`` is a categorical type and ``sort=False`` (:issue:`48976`) +- Performance improvement in :func:`merge` when not merging on the index - the new index will now be :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`49478`) .. --------------------------------------------------------------------------- .. _whatsnew_200.bug_fixes: diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index cea9aaf70ccd0..e4e20ef98224c 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -95,6 +95,7 @@ Index, MultiIndex, all_indexes_same, + default_index, ) from pandas.core.indexes.category import CategoricalIndex from pandas.core.series import Series @@ -1159,7 +1160,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) if not self.as_index: self._insert_inaxis_grouper_inplace(result) - result.index = Index(range(len(result))) + result.index = default_index(len(result)) return result @@ -1778,7 +1779,7 @@ def nunique(self, dropna: bool = True) -> DataFrame: ) if not self.as_index: - results.index = Index(range(len(results))) + results.index = default_index(len(results)) self._insert_inaxis_grouper_inplace(results) return results diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py index a39e3c1f10956..ec077caeef69e 100644 --- a/pandas/core/reshape/encoding.py +++ b/pandas/core/reshape/encoding.py @@ -21,7 +21,10 @@ from pandas.core.arrays import SparseArray from pandas.core.arrays.categorical import factorize_from_iterable from pandas.core.frame import DataFrame -from pandas.core.indexes.api import Index +from pandas.core.indexes.api import ( + Index, + default_index, +) from pandas.core.series import Series @@ -249,7 +252,7 @@ def get_empty_frame(data) -> DataFrame: if isinstance(data, Series): index = data.index else: - index = Index(range(len(data))) + index = default_index(len(data)) return DataFrame(index=index) # if all NaN diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 3f98ab16c6797..2600dbf249e30 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -84,6 +84,7 @@ import pandas.core.common as com from pandas.core.construction import extract_array from pandas.core.frame import _merge_doc +from pandas.core.indexes.api import default_index from pandas.core.sorting import is_int64_overflow_possible if TYPE_CHECKING: @@ -1060,7 +1061,7 @@ def _get_join_info( else: join_index = self.left.index.take(left_indexer) else: - join_index = Index(np.arange(len(left_indexer))) + join_index = default_index(len(left_indexer)) if len(join_index) == 0: join_index = join_index.astype(object)