From 09af3f7c966fe7937e1e5f3e888e9ff0ed289e14 Mon Sep 17 00:00:00 2001 From: bang128 <71242233+bang128@users.noreply.github.com> Date: Mon, 21 Nov 2022 23:22:00 -0800 Subject: [PATCH 1/3] Fix concat.py --- pandas/core/internals/concat.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 0592db8ad608d..cfb049b6cff58 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -69,7 +69,7 @@ def _concatenate_array_managers( - mgrs_indexers, axes: list[Index], concat_axis: AxisInt, copy: bool + mgrs_indexers, axes: list[Index], concat_axis: AxisInt, copy_b: bool ) -> Manager: """ Concatenate array managers into one. @@ -95,7 +95,7 @@ def _concatenate_array_managers( ) if ax == 1 and indexer is not None: axis1_made_copy = True - if copy and concat_axis == 0 and not axis1_made_copy: + if copy_b and concat_axis == 0 and not axis1_made_copy: # for concat_axis 1 we will always get a copy through concat_arrays mgr = mgr.copy() mgrs.append(mgr) @@ -151,7 +151,7 @@ def concat_arrays(to_concat: list) -> ArrayLike: to_concat = [ arr.to_array(target_dtype) if isinstance(arr, NullArrayProxy) - else astype_array(arr, target_dtype, copy=False) + else astype_array(arr, target_dtype, copy_b=False) for arr in to_concat ] @@ -173,7 +173,7 @@ def concat_arrays(to_concat: list) -> ArrayLike: def concatenate_managers( - mgrs_indexers, axes: list[Index], concat_axis: AxisInt, copy: bool + mgrs_indexers, axes: list[Index], concat_axis: AxisInt, copy_b: bool ) -> Manager: """ Concatenate block managers into one. @@ -191,7 +191,7 @@ def concatenate_managers( """ # TODO(ArrayManager) this assumes that all managers are of the same type if isinstance(mgrs_indexers[0][0], ArrayManager): - return _concatenate_array_managers(mgrs_indexers, axes, concat_axis, copy) + return _concatenate_array_managers(mgrs_indexers, axes, concat_axis, copy_b) mgrs_indexers = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers) @@ -207,7 +207,7 @@ def concatenate_managers( if len(join_units) == 1 and not join_units[0].indexers: values = blk.values - if copy: + if copy_b: values = values.copy() else: values = values.view() @@ -229,7 +229,7 @@ def concatenate_managers( fastpath = blk.values.dtype == values.dtype else: - values = _concatenate_join_units(join_units, concat_axis, copy=copy) + values = _concatenate_join_units(join_units, concat_axis, copy_b=copy_b) fastpath = False if fastpath: @@ -260,7 +260,7 @@ def _maybe_reindex_columns_na_proxy( axes[0], indexers[0], axis=0, - copy=False, + copy_b=False, only_slice=True, allow_dups=True, use_na_proxy=True, @@ -524,7 +524,7 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike: def _concatenate_join_units( - join_units: list[JoinUnit], concat_axis: AxisInt, copy: bool + join_units: list[JoinUnit], concat_axis: AxisInt, copy_b: bool ) -> ArrayLike: """ Concatenate values from several join units along selected axis. @@ -546,7 +546,7 @@ def _concatenate_join_units( if len(to_concat) == 1: # Only one block, nothing to concatenate. concat_values = to_concat[0] - if copy: + if copy_b: if isinstance(concat_values, np.ndarray): # non-reindexed (=not yet copied) arrays are made into a view # in JoinUnit.get_reindexed_values From 580d7758ac39f8f28dd0c88284d792a1e5b7cab0 Mon Sep 17 00:00:00 2001 From: bang128 <71242233+bang128@users.noreply.github.com> Date: Tue, 22 Nov 2022 16:28:49 -0800 Subject: [PATCH 2/3] #49656: update concat.py, fix merge.py --- pandas/core/internals/concat.py | 20 ++++++++++---------- pandas/core/reshape/merge.py | 32 ++++++++++++++++---------------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index cfb049b6cff58..349b7fc94d34d 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -69,7 +69,7 @@ def _concatenate_array_managers( - mgrs_indexers, axes: list[Index], concat_axis: AxisInt, copy_b: bool + mgrs_indexers, axes: list[Index], concat_axis: AxisInt, is_copied: bool ) -> Manager: """ Concatenate array managers into one. @@ -95,7 +95,7 @@ def _concatenate_array_managers( ) if ax == 1 and indexer is not None: axis1_made_copy = True - if copy_b and concat_axis == 0 and not axis1_made_copy: + if is_copied and concat_axis == 0 and not axis1_made_copy: # for concat_axis 1 we will always get a copy through concat_arrays mgr = mgr.copy() mgrs.append(mgr) @@ -151,7 +151,7 @@ def concat_arrays(to_concat: list) -> ArrayLike: to_concat = [ arr.to_array(target_dtype) if isinstance(arr, NullArrayProxy) - else astype_array(arr, target_dtype, copy_b=False) + else astype_array(arr, target_dtype, is_copied=False) for arr in to_concat ] @@ -173,7 +173,7 @@ def concat_arrays(to_concat: list) -> ArrayLike: def concatenate_managers( - mgrs_indexers, axes: list[Index], concat_axis: AxisInt, copy_b: bool + mgrs_indexers, axes: list[Index], concat_axis: AxisInt, is_copied: bool ) -> Manager: """ Concatenate block managers into one. @@ -191,7 +191,7 @@ def concatenate_managers( """ # TODO(ArrayManager) this assumes that all managers are of the same type if isinstance(mgrs_indexers[0][0], ArrayManager): - return _concatenate_array_managers(mgrs_indexers, axes, concat_axis, copy_b) + return _concatenate_array_managers(mgrs_indexers, axes, concat_axis, is_copied) mgrs_indexers = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers) @@ -207,7 +207,7 @@ def concatenate_managers( if len(join_units) == 1 and not join_units[0].indexers: values = blk.values - if copy_b: + if is_copied: values = values.copy() else: values = values.view() @@ -229,7 +229,7 @@ def concatenate_managers( fastpath = blk.values.dtype == values.dtype else: - values = _concatenate_join_units(join_units, concat_axis, copy_b=copy_b) + values = _concatenate_join_units(join_units, concat_axis, is_copied=is_copied) fastpath = False if fastpath: @@ -260,7 +260,7 @@ def _maybe_reindex_columns_na_proxy( axes[0], indexers[0], axis=0, - copy_b=False, + is_copied=False, only_slice=True, allow_dups=True, use_na_proxy=True, @@ -524,7 +524,7 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike: def _concatenate_join_units( - join_units: list[JoinUnit], concat_axis: AxisInt, copy_b: bool + join_units: list[JoinUnit], concat_axis: AxisInt, is_copied: bool ) -> ArrayLike: """ Concatenate values from several join units along selected axis. @@ -546,7 +546,7 @@ def _concatenate_join_units( if len(to_concat) == 1: # Only one block, nothing to concatenate. concat_values = to_concat[0] - if copy_b: + if is_copied: if isinstance(concat_values, np.ndarray): # non-reindexed (=not yet copied) arrays are made into a view # in JoinUnit.get_reindexed_values diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index cc9a7b7f8d40b..74f5c8c3a3425 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -106,7 +106,7 @@ def merge( right_index: bool = False, sort: bool = False, suffixes: Suffixes = ("_x", "_y"), - copy: bool = True, + is_copied: bool = True, indicator: str | bool = False, validate: str | None = None, ) -> DataFrame: @@ -124,7 +124,7 @@ def merge( indicator=indicator, validate=validate, ) - return op.get_result(copy=copy) + return op.get_result(is_copied=is_copied) if __debug__: @@ -183,7 +183,7 @@ def _groupby_and_merge(by, left: DataFrame, right: DataFrame, merge_pieces): from pandas.core.reshape.concat import concat result = concat(pieces, ignore_index=True) - result = result.reindex(columns=pieces[0].columns, copy=False) + result = result.reindex(columns=pieces[0].columns, is_copied=False) return result, lby @@ -624,7 +624,7 @@ class _MergeOperation: bm_axis: AxisInt sort: bool suffixes: Suffixes - copy: bool + is_copied: bool indicator: str | bool validate: str | None join_names: list[Hashable] @@ -721,7 +721,7 @@ def _reindex_and_concat( join_index: Index, left_indexer: npt.NDArray[np.intp] | None, right_indexer: npt.NDArray[np.intp] | None, - copy: bool, + is_copied: bool, ) -> DataFrame: """ reindex along index and concat along columns. @@ -742,7 +742,7 @@ def _reindex_and_concat( join_index, left_indexer, axis=1, - copy=False, + is_copied=False, only_slice=True, allow_dups=True, use_na_proxy=True, @@ -755,7 +755,7 @@ def _reindex_and_concat( join_index, right_indexer, axis=1, - copy=False, + is_copied=False, only_slice=True, allow_dups=True, use_na_proxy=True, @@ -767,17 +767,17 @@ def _reindex_and_concat( left.columns = llabels right.columns = rlabels - result = concat([left, right], axis=1, copy=copy) + result = concat([left, right], axis=1, is_copied=is_copied) return result - def get_result(self, copy: bool = True) -> DataFrame: + def get_result(self, is_copied: bool = True) -> DataFrame: if self.indicator: self.left, self.right = self._indicator_pre_merge(self.left, self.right) join_index, left_indexer, right_indexer = self._get_join_info() result = self._reindex_and_concat( - join_index, left_indexer, right_indexer, copy=copy + join_index, left_indexer, right_indexer, is_copied=is_copied ) result = result.__finalize__(self, method=self._merge_type) @@ -1776,7 +1776,7 @@ def __init__( sort=True, # factorize sorts ) - def get_result(self, copy: bool = True) -> DataFrame: + def get_result(self, is_copied: bool = True) -> DataFrame: join_index, left_indexer, right_indexer = self._get_join_info() llabels, rlabels = _items_overlap_with_suffix( @@ -1799,7 +1799,7 @@ def get_result(self, copy: bool = True) -> DataFrame: right_join_indexer = right_indexer result = self._reindex_and_concat( - join_index, left_join_indexer, right_join_indexer, copy=copy + join_index, left_join_indexer, right_join_indexer, is_copied=is_copied ) self._maybe_add_join_keys(result, left_indexer, right_indexer) @@ -1845,7 +1845,7 @@ def __init__( right_by=None, axis: AxisInt = 1, suffixes: Suffixes = ("_x", "_y"), - copy: bool = True, + is_copied: bool = True, fill_method: str | None = None, how: str = "asof", tolerance=None, @@ -2156,7 +2156,7 @@ def _get_multiindex_indexer( if sort: rcodes = list(map(np.take, rcodes, index.codes)) else: - i8copy = lambda a: a.astype("i8", subok=False, copy=True) + i8copy = lambda a: a.astype("i8", subok=False, is_copied=True) rcodes = list(map(i8copy, index.codes)) # fix right labels if there were any nulls @@ -2420,8 +2420,8 @@ def _get_join_keys( # get keys for the first `nlev` levels stride = np.prod(shape[1:nlev], dtype="i8") - lkey = stride * llab[0].astype("i8", subok=False, copy=False) - rkey = stride * rlab[0].astype("i8", subok=False, copy=False) + lkey = stride * llab[0].astype("i8", subok=False, is_copied=False) + rkey = stride * rlab[0].astype("i8", subok=False, is_copied=False) for i in range(1, nlev): with np.errstate(divide="ignore"): From 49ddde22880e5780c78822af700173127e3a6b13 Mon Sep 17 00:00:00 2001 From: bang128 <71242233+bang128@users.noreply.github.com> Date: Tue, 22 Nov 2022 18:38:48 -0800 Subject: [PATCH 3/3] #49656: update concat.py and merge.py --- pandas/core/internals/concat.py | 24 +++++++++++----------- pandas/core/reshape/merge.py | 36 ++++++++++++++++----------------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 349b7fc94d34d..d1a252f727e90 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -1,6 +1,6 @@ from __future__ import annotations -import copy +import copy as cp import itertools from typing import ( TYPE_CHECKING, @@ -69,7 +69,7 @@ def _concatenate_array_managers( - mgrs_indexers, axes: list[Index], concat_axis: AxisInt, is_copied: bool + mgrs_indexers, axes: list[Index], concat_axis: AxisInt, copy: bool ) -> Manager: """ Concatenate array managers into one. @@ -95,7 +95,7 @@ def _concatenate_array_managers( ) if ax == 1 and indexer is not None: axis1_made_copy = True - if is_copied and concat_axis == 0 and not axis1_made_copy: + if copy and concat_axis == 0 and not axis1_made_copy: # for concat_axis 1 we will always get a copy through concat_arrays mgr = mgr.copy() mgrs.append(mgr) @@ -151,7 +151,7 @@ def concat_arrays(to_concat: list) -> ArrayLike: to_concat = [ arr.to_array(target_dtype) if isinstance(arr, NullArrayProxy) - else astype_array(arr, target_dtype, is_copied=False) + else astype_array(arr, target_dtype, copy=False) for arr in to_concat ] @@ -173,7 +173,7 @@ def concat_arrays(to_concat: list) -> ArrayLike: def concatenate_managers( - mgrs_indexers, axes: list[Index], concat_axis: AxisInt, is_copied: bool + mgrs_indexers, axes: list[Index], concat_axis: AxisInt, copy: bool ) -> Manager: """ Concatenate block managers into one. @@ -191,7 +191,7 @@ def concatenate_managers( """ # TODO(ArrayManager) this assumes that all managers are of the same type if isinstance(mgrs_indexers[0][0], ArrayManager): - return _concatenate_array_managers(mgrs_indexers, axes, concat_axis, is_copied) + return _concatenate_array_managers(mgrs_indexers, axes, concat_axis, copy) mgrs_indexers = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers) @@ -207,7 +207,7 @@ def concatenate_managers( if len(join_units) == 1 and not join_units[0].indexers: values = blk.values - if is_copied: + if copy: values = values.copy() else: values = values.view() @@ -229,7 +229,7 @@ def concatenate_managers( fastpath = blk.values.dtype == values.dtype else: - values = _concatenate_join_units(join_units, concat_axis, is_copied=is_copied) + values = _concatenate_join_units(join_units, concat_axis, copy=copy) fastpath = False if fastpath: @@ -260,7 +260,7 @@ def _maybe_reindex_columns_na_proxy( axes[0], indexers[0], axis=0, - is_copied=False, + copy=False, only_slice=True, allow_dups=True, use_na_proxy=True, @@ -524,7 +524,7 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike: def _concatenate_join_units( - join_units: list[JoinUnit], concat_axis: AxisInt, is_copied: bool + join_units: list[JoinUnit], concat_axis: AxisInt, copy: bool ) -> ArrayLike: """ Concatenate values from several join units along selected axis. @@ -546,7 +546,7 @@ def _concatenate_join_units( if len(to_concat) == 1: # Only one block, nothing to concatenate. concat_values = to_concat[0] - if is_copied: + if copy: if isinstance(concat_values, np.ndarray): # non-reindexed (=not yet copied) arrays are made into a view # in JoinUnit.get_reindexed_values @@ -691,7 +691,7 @@ def _trim_join_unit(join_unit: JoinUnit, length: int) -> JoinUnit: else: extra_block = join_unit.block - extra_indexers = copy.copy(join_unit.indexers) + extra_indexers = cp.copy(join_unit.indexers) extra_indexers[0] = extra_indexers[0][length:] join_unit.indexers[0] = join_unit.indexers[0][:length] diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 74f5c8c3a3425..9e73dcb789075 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -3,7 +3,7 @@ """ from __future__ import annotations -import copy +import copy as cp import datetime from functools import partial import string @@ -106,7 +106,7 @@ def merge( right_index: bool = False, sort: bool = False, suffixes: Suffixes = ("_x", "_y"), - is_copied: bool = True, + copy: bool = True, indicator: str | bool = False, validate: str | None = None, ) -> DataFrame: @@ -124,7 +124,7 @@ def merge( indicator=indicator, validate=validate, ) - return op.get_result(is_copied=is_copied) + return op.get_result(copy=copy) if __debug__: @@ -183,7 +183,7 @@ def _groupby_and_merge(by, left: DataFrame, right: DataFrame, merge_pieces): from pandas.core.reshape.concat import concat result = concat(pieces, ignore_index=True) - result = result.reindex(columns=pieces[0].columns, is_copied=False) + result = result.reindex(columns=pieces[0].columns, copy=False) return result, lby @@ -624,7 +624,7 @@ class _MergeOperation: bm_axis: AxisInt sort: bool suffixes: Suffixes - is_copied: bool + copy: bool indicator: str | bool validate: str | None join_names: list[Hashable] @@ -721,7 +721,7 @@ def _reindex_and_concat( join_index: Index, left_indexer: npt.NDArray[np.intp] | None, right_indexer: npt.NDArray[np.intp] | None, - is_copied: bool, + copy: bool, ) -> DataFrame: """ reindex along index and concat along columns. @@ -742,7 +742,7 @@ def _reindex_and_concat( join_index, left_indexer, axis=1, - is_copied=False, + copy=False, only_slice=True, allow_dups=True, use_na_proxy=True, @@ -755,7 +755,7 @@ def _reindex_and_concat( join_index, right_indexer, axis=1, - is_copied=False, + copy=False, only_slice=True, allow_dups=True, use_na_proxy=True, @@ -767,17 +767,17 @@ def _reindex_and_concat( left.columns = llabels right.columns = rlabels - result = concat([left, right], axis=1, is_copied=is_copied) + result = concat([left, right], axis=1, copy=copy) return result - def get_result(self, is_copied: bool = True) -> DataFrame: + def get_result(self, copy: bool = True) -> DataFrame: if self.indicator: self.left, self.right = self._indicator_pre_merge(self.left, self.right) join_index, left_indexer, right_indexer = self._get_join_info() result = self._reindex_and_concat( - join_index, left_indexer, right_indexer, is_copied=is_copied + join_index, left_indexer, right_indexer, copy=copy ) result = result.__finalize__(self, method=self._merge_type) @@ -1637,7 +1637,7 @@ def get_join_indexers( lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort, how=how) # preserve left frame order if how == 'left' and sort == False - kwargs = copy.copy(kwargs) + kwargs = cp.copy(kwargs) if how in ("left", "right"): kwargs["sort"] = sort join_func = { @@ -1776,7 +1776,7 @@ def __init__( sort=True, # factorize sorts ) - def get_result(self, is_copied: bool = True) -> DataFrame: + def get_result(self, copy: bool = True) -> DataFrame: join_index, left_indexer, right_indexer = self._get_join_info() llabels, rlabels = _items_overlap_with_suffix( @@ -1799,7 +1799,7 @@ def get_result(self, is_copied: bool = True) -> DataFrame: right_join_indexer = right_indexer result = self._reindex_and_concat( - join_index, left_join_indexer, right_join_indexer, is_copied=is_copied + join_index, left_join_indexer, right_join_indexer, copy=copy ) self._maybe_add_join_keys(result, left_indexer, right_indexer) @@ -1845,7 +1845,7 @@ def __init__( right_by=None, axis: AxisInt = 1, suffixes: Suffixes = ("_x", "_y"), - is_copied: bool = True, + copy: bool = True, fill_method: str | None = None, how: str = "asof", tolerance=None, @@ -2156,7 +2156,7 @@ def _get_multiindex_indexer( if sort: rcodes = list(map(np.take, rcodes, index.codes)) else: - i8copy = lambda a: a.astype("i8", subok=False, is_copied=True) + i8copy = lambda a: a.astype("i8", subok=False, copy=True) rcodes = list(map(i8copy, index.codes)) # fix right labels if there were any nulls @@ -2420,8 +2420,8 @@ def _get_join_keys( # get keys for the first `nlev` levels stride = np.prod(shape[1:nlev], dtype="i8") - lkey = stride * llab[0].astype("i8", subok=False, is_copied=False) - rkey = stride * rlab[0].astype("i8", subok=False, is_copied=False) + lkey = stride * llab[0].astype("i8", subok=False, copy=False) + rkey = stride * rlab[0].astype("i8", subok=False, copy=False) for i in range(1, nlev): with np.errstate(divide="ignore"):