diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 18fd1057c9b65..5e7c75282bf84 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -440,6 +440,7 @@ Reshaping ^^^^^^^^^ - Bug in :func:`concat` between a :class:`Series` with integer dtype and another with :class:`CategoricalDtype` with integer categories and containing ``NaN`` values casting to object dtype instead of ``float64`` (:issue:`45359`) - Bug in :func:`get_dummies` that selected object and categorical dtypes but not string (:issue:`44965`) +- Bug in :meth:`DataFrame.align` when aligning a :class:`MultiIndex` to a :class:`Series` with another :class:`MultiIndex` (:issue:`46001`) - Sparse diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3a8081a234a3b..86e5b372994dc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8975,10 +8975,14 @@ def _align_series( is_series = isinstance(self, ABCSeries) + if (not is_series and axis is None) or axis not in [None, 0, 1]: + raise ValueError("Must specify axis=0 or 1") + + if is_series and axis == 1: + raise ValueError("cannot align series to a series other than axis 0") + # series/series compat, other must always be a Series - if is_series: - if axis: - raise ValueError("cannot align series to a series other than axis 0") + if not axis: # equal if self.index.equals(other.index): @@ -8988,26 +8992,38 @@ def _align_series( other.index, how=join, level=level, return_indexers=True ) - left = self._reindex_indexer(join_index, lidx, copy) + if is_series: + left = self._reindex_indexer(join_index, lidx, copy) + elif lidx is None: + left = self.copy() if copy else self + else: + data = algos.take_nd( + self.values, + lidx, + allow_fill=True, + fill_value=None, + ) + + left = self._constructor( + data=data, columns=self.columns, index=join_index + ) + right = other._reindex_indexer(join_index, ridx, copy) else: + # one has > 1 ndim fdata = self._mgr - if axis in [0, 1]: - join_index = self.axes[axis] - lidx, ridx = None, None - if not join_index.equals(other.index): - join_index, lidx, ridx = join_index.join( - other.index, how=join, level=level, return_indexers=True - ) - - if lidx is not None: - bm_axis = self._get_block_manager_axis(axis) - fdata = fdata.reindex_indexer(join_index, lidx, axis=bm_axis) + join_index = self.axes[1] + lidx, ridx = None, None + if not join_index.equals(other.index): + join_index, lidx, ridx = join_index.join( + other.index, how=join, level=level, return_indexers=True + ) - else: - raise ValueError("Must specify axis=0 or 1") + if lidx is not None: + bm_axis = self._get_block_manager_axis(1) + fdata = fdata.reindex_indexer(join_index, lidx, axis=bm_axis) if copy and fdata is self._mgr: fdata = fdata.copy() diff --git a/pandas/tests/frame/methods/test_align.py b/pandas/tests/frame/methods/test_align.py index 03ea6492c07f3..575db40f171a2 100644 --- a/pandas/tests/frame/methods/test_align.py +++ b/pandas/tests/frame/methods/test_align.py @@ -243,6 +243,105 @@ def test_align_series_combinations(self): tm.assert_series_equal(res1, exp2) tm.assert_frame_equal(res2, exp1) + def test_multiindex_align_to_series_with_common_index_level(self): + # GH-46001 + foo_index = Index([1, 2, 3], name="foo") + bar_index = Index([1, 2], name="bar") + + series = Series([1, 2], index=bar_index, name="foo_series") + df = DataFrame( + {"col": np.arange(6)}, + index=pd.MultiIndex.from_product([foo_index, bar_index]), + ) + + expected_r = Series([1, 2] * 3, index=df.index, name="foo_series") + result_l, result_r = df.align(series, axis=0) + + tm.assert_frame_equal(result_l, df) + tm.assert_series_equal(result_r, expected_r) + + def test_multiindex_align_to_series_with_common_index_level_missing_in_left(self): + # GH-46001 + foo_index = Index([1, 2, 3], name="foo") + bar_index = Index([1, 2], name="bar") + + series = Series( + [1, 2, 3, 4], index=Index([1, 2, 3, 4], name="bar"), name="foo_series" + ) + df = DataFrame( + {"col": np.arange(6)}, + index=pd.MultiIndex.from_product([foo_index, bar_index]), + ) + + expected_r = Series([1, 2] * 3, index=df.index, name="foo_series") + result_l, result_r = df.align(series, axis=0) + + tm.assert_frame_equal(result_l, df) + tm.assert_series_equal(result_r, expected_r) + + def test_multiindex_align_to_series_with_common_index_level_missing_in_right(self): + # GH-46001 + foo_index = Index([1, 2, 3], name="foo") + bar_index = Index([1, 2, 3, 4], name="bar") + + series = Series([1, 2], index=Index([1, 2], name="bar"), name="foo_series") + df = DataFrame( + {"col": np.arange(12)}, + index=pd.MultiIndex.from_product([foo_index, bar_index]), + ) + + expected_r = Series( + [1, 2, np.nan, np.nan] * 3, index=df.index, name="foo_series" + ) + result_l, result_r = df.align(series, axis=0) + + tm.assert_frame_equal(result_l, df) + tm.assert_series_equal(result_r, expected_r) + + def test_multiindex_align_to_series_with_common_index_level_missing_in_both(self): + # GH-46001 + foo_index = Index([1, 2, 3], name="foo") + bar_index = Index([1, 3, 4], name="bar") + + series = Series( + [1, 2, 3], index=Index([1, 2, 4], name="bar"), name="foo_series" + ) + df = DataFrame( + {"col": np.arange(9)}, + index=pd.MultiIndex.from_product([foo_index, bar_index]), + ) + + expected_r = Series([1, np.nan, 3] * 3, index=df.index, name="foo_series") + result_l, result_r = df.align(series, axis=0) + + tm.assert_frame_equal(result_l, df) + tm.assert_series_equal(result_r, expected_r) + + def test_multiindex_align_to_series_with_common_index_level_non_unique_cols(self): + # GH-46001 + foo_index = Index([1, 2, 3], name="foo") + bar_index = Index([1, 2], name="bar") + + series = Series([1, 2], index=bar_index, name="foo_series") + df = DataFrame( + np.arange(18).reshape(6, 3), + index=pd.MultiIndex.from_product([foo_index, bar_index]), + ) + df.columns = ["cfoo", "cbar", "cfoo"] + + expected = Series([1, 2] * 3, index=df.index, name="foo_series") + result_left, result_right = df.align(series, axis=0) + + tm.assert_series_equal(result_right, expected) + tm.assert_index_equal(result_left.columns, df.columns) + + def test_missing_axis_specification_exception(self): + df = DataFrame(np.arange(50).reshape((10, 5))) + series = Series(np.arange(5)) + + with pytest.raises(ValueError, match=r"axis=0 or 1"): + df.align(series) + def _check_align(self, a, b, axis, fill_axis, how, method, limit=None): aa, ab = a.align( b, axis=axis, join=how, method=method, limit=limit, fill_axis=fill_axis diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 9ab14e2a55662..da5e7d15ab9d2 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -722,6 +722,116 @@ def test_broadcast_multiindex(self, level): tm.assert_frame_equal(result, expected) + def test_frame_multiindex_operations(self): + # GH 43321 + df = DataFrame( + {2010: [1, 2, 3], 2020: [3, 4, 5]}, + index=MultiIndex.from_product( + [["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"] + ), + ) + + series = Series( + [0.4], + index=MultiIndex.from_product([["b"], ["a"]], names=["mod", "scen"]), + ) + + expected = DataFrame( + {2010: [1.4, 2.4, 3.4], 2020: [3.4, 4.4, 5.4]}, + index=MultiIndex.from_product( + [["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"] + ), + ) + result = df.add(series, axis=0) + + tm.assert_frame_equal(result, expected) + + def test_frame_multiindex_operations_series_index_to_frame_index(self): + # GH 43321 + df = DataFrame( + {2010: [1], 2020: [3]}, + index=MultiIndex.from_product([["a"], ["b"]], names=["scen", "mod"]), + ) + + series = Series( + [10.0, 20.0, 30.0], + index=MultiIndex.from_product( + [["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"] + ), + ) + + expected = DataFrame( + {2010: [11.0, 21, 31.0], 2020: [13.0, 23.0, 33.0]}, + index=MultiIndex.from_product( + [["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"] + ), + ) + result = df.add(series, axis=0) + + tm.assert_frame_equal(result, expected) + + def test_frame_multiindex_operations_no_align(self): + df = DataFrame( + {2010: [1, 2, 3], 2020: [3, 4, 5]}, + index=MultiIndex.from_product( + [["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"] + ), + ) + + series = Series( + [0.4], + index=MultiIndex.from_product([["c"], ["a"]], names=["mod", "scen"]), + ) + + expected = DataFrame( + {2010: np.nan, 2020: np.nan}, + index=MultiIndex.from_tuples( + [ + ("a", "b", 0), + ("a", "b", 1), + ("a", "b", 2), + ("a", "c", np.nan), + ], + names=["scen", "mod", "id"], + ), + ) + result = df.add(series, axis=0) + + tm.assert_frame_equal(result, expected) + + def test_frame_multiindex_operations_part_align(self): + df = DataFrame( + {2010: [1, 2, 3], 2020: [3, 4, 5]}, + index=MultiIndex.from_tuples( + [ + ("a", "b", 0), + ("a", "b", 1), + ("a", "c", 2), + ], + names=["scen", "mod", "id"], + ), + ) + + series = Series( + [0.4], + index=MultiIndex.from_product([["b"], ["a"]], names=["mod", "scen"]), + ) + + expected = DataFrame( + {2010: [1.4, 2.4, np.nan], 2020: [3.4, 4.4, np.nan]}, + index=MultiIndex.from_tuples( + [ + ("a", "b", 0), + ("a", "b", 1), + ("a", "c", 2), + ], + names=["scen", "mod", "id"], + ), + ) + result = df.add(series, axis=0) + + tm.assert_frame_equal(result, expected) + class TestFrameArithmetic: def test_td64_op_nat_casting(self):