From 8440ddef16cb08f76d04c0ca21d057a67a21fbbe Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Fri, 21 Mar 2025 21:06:33 +0000 Subject: [PATCH 01/30] test case for subplot stacking --- pandas/tests/plotting/test_common.py | 33 +++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/pandas/tests/plotting/test_common.py b/pandas/tests/plotting/test_common.py index 20daf59356248..c4d74fd30a9c8 100644 --- a/pandas/tests/plotting/test_common.py +++ b/pandas/tests/plotting/test_common.py @@ -1,6 +1,7 @@ import pytest - +import numpy as np from pandas import DataFrame +from pandas import unique from pandas.tests.plotting.common import ( _check_plot_works, _check_ticks_props, @@ -58,3 +59,33 @@ def test_colorbar_layout(self): fig.colorbar(cs0, ax=[axes["A"], axes["B"]], location="right") DataFrame(x).plot(ax=axes["C"]) + + def test_bar_subplot_stacking(self): + #GH Issue 61018 + #Extracts height and location data + test_data = np.random.default_rng(3).integers(0,100,5) + df = DataFrame({"a": test_data, "b": test_data[::-1]}) + ax = _check_plot_works(df.plot, subplots= [('a','b')], kind="bar", stacked=True) + + #get xy and height of squares that represent the data graphed from the df + #we would expect the height value of A to be reflected in the Y coord of B + data_from_plot_mat = [(x.get_x(), x.get_y(), x.get_height()) for x in ax[0].findobj(plt.Rectangle) if x.get_height() in test_data] + data_from_plot_df = DataFrame(data = data_from_plot_mat, columns = ["x_coord", "y_coord", "height"]) + unique_x_loc = unique(data_from_plot_df["x_coord"]) + + plot_a_df = data_from_plot_df.iloc[:len(test_data)] + plot_b_df = data_from_plot_df.iloc[len(test_data):].reset_index() + total_bar_height = plot_a_df["height"].add(plot_b_df["height"]) + + print(test_data + test_data[::-1]) + + #check number of bars matches the number of data plotted + assert len(unique_x_loc) == len(test_data) + + #checks that the first set of bars are the correct height and that the second one starts at the top of the first, additional checks the combined height of the bars are correct + assert (plot_a_df["height"] == test_data).all() + assert (plot_b_df["y_coord"] == test_data).all() + assert (total_bar_height == test_data + test_data[::-1]).all() + + + From 350a7ecbbdb423ecddf3bcff02a33732328b2412 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Fri, 21 Mar 2025 22:19:19 +0000 Subject: [PATCH 02/30] Removed overlooked print statement --- pandas/tests/plotting/test_common.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/tests/plotting/test_common.py b/pandas/tests/plotting/test_common.py index c4d74fd30a9c8..372adba9bce8f 100644 --- a/pandas/tests/plotting/test_common.py +++ b/pandas/tests/plotting/test_common.py @@ -77,8 +77,6 @@ def test_bar_subplot_stacking(self): plot_b_df = data_from_plot_df.iloc[len(test_data):].reset_index() total_bar_height = plot_a_df["height"].add(plot_b_df["height"]) - print(test_data + test_data[::-1]) - #check number of bars matches the number of data plotted assert len(unique_x_loc) == len(test_data) From b27d1acd88531a5f08340f69f1a80dcc00c2a19c Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Sun, 30 Mar 2025 03:20:18 -0500 Subject: [PATCH 03/30] Updated test to check other subplot in figure --- pandas/tests/plotting/test_common.py | 47 ++++++++++++++++++---------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/pandas/tests/plotting/test_common.py b/pandas/tests/plotting/test_common.py index 372adba9bce8f..ecbb7420483f8 100644 --- a/pandas/tests/plotting/test_common.py +++ b/pandas/tests/plotting/test_common.py @@ -62,28 +62,43 @@ def test_colorbar_layout(self): def test_bar_subplot_stacking(self): #GH Issue 61018 - #Extracts height and location data test_data = np.random.default_rng(3).integers(0,100,5) - df = DataFrame({"a": test_data, "b": test_data[::-1]}) - ax = _check_plot_works(df.plot, subplots= [('a','b')], kind="bar", stacked=True) + df = DataFrame({"A": test_data, "B": test_data[::-1], "C": test_data[0]}) + ax = df.plot(subplots= [('A','B')], kind="bar", stacked=True) - #get xy and height of squares that represent the data graphed from the df - #we would expect the height value of A to be reflected in the Y coord of B - data_from_plot_mat = [(x.get_x(), x.get_y(), x.get_height()) for x in ax[0].findobj(plt.Rectangle) if x.get_height() in test_data] - data_from_plot_df = DataFrame(data = data_from_plot_mat, columns = ["x_coord", "y_coord", "height"]) - unique_x_loc = unique(data_from_plot_df["x_coord"]) - - plot_a_df = data_from_plot_df.iloc[:len(test_data)] - plot_b_df = data_from_plot_df.iloc[len(test_data):].reset_index() - total_bar_height = plot_a_df["height"].add(plot_b_df["height"]) + #finds all the rectangles that represent the values from both subplots + data_from_subplots = [[(x.get_x(), x.get_y(), x.get_height()) for x in ax[i].findobj(plt.Rectangle) if x.get_height() in test_data] for i in range(0,2)] + #get xy and height of squares that represent the data graphed from the df + #we would expect the height value of A to be reflected in the Y coord of B in subplot 1 + subplot_data_df_list = [] + unique_x_loc_list = [] + for i in range(0,len(data_from_subplots)): + subplot_data_df= DataFrame(data = data_from_subplots[i], columns = ["x_coord", "y_coord", "height"]) + unique_x_loc = unique(subplot_data_df["x_coord"]) + + subplot_data_df_list.append(subplot_data_df) + unique_x_loc_list.append(unique_x_loc) + + #Checks subplot 1 + plot_A_df = subplot_data_df_list[0].iloc[:len(test_data)] + plot_B_df = subplot_data_df_list[0].iloc[len(test_data):].reset_index() + total_bar_height = plot_A_df["height"].add(plot_B_df["height"]) #check number of bars matches the number of data plotted - assert len(unique_x_loc) == len(test_data) - + assert len(unique_x_loc_list[0]) == len(test_data) #checks that the first set of bars are the correct height and that the second one starts at the top of the first, additional checks the combined height of the bars are correct - assert (plot_a_df["height"] == test_data).all() - assert (plot_b_df["y_coord"] == test_data).all() + assert (plot_A_df["height"] == test_data).all() + assert (plot_B_df["y_coord"] == test_data).all() assert (total_bar_height == test_data + test_data[::-1]).all() + #Checks subplot 2 + plot_C_df = subplot_data_df_list[1].iloc[:len(test_data)] + #check number of bars matches the number of data plotted + assert len(unique_x_loc_list[1]) == len(test_data) + #checks that all the bars start at zero and are the correct height + assert (plot_C_df["height"] == test_data[0]).all() + assert (plot_C_df["y_coord"] == 0).all() + + From 1fe342af043b76c674adb73d84fe946f30133d83 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Tue, 8 Apr 2025 00:44:25 +0000 Subject: [PATCH 04/30] Updated test cases to include more subplot stacking possibilities --- pandas/tests/plotting/test_common.py | 40 -------------- pandas/tests/plotting/test_misc.py | 81 ++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 40 deletions(-) diff --git a/pandas/tests/plotting/test_common.py b/pandas/tests/plotting/test_common.py index ecbb7420483f8..ee7b57ee62b3b 100644 --- a/pandas/tests/plotting/test_common.py +++ b/pandas/tests/plotting/test_common.py @@ -1,7 +1,6 @@ import pytest import numpy as np from pandas import DataFrame -from pandas import unique from pandas.tests.plotting.common import ( _check_plot_works, _check_ticks_props, @@ -59,45 +58,6 @@ def test_colorbar_layout(self): fig.colorbar(cs0, ax=[axes["A"], axes["B"]], location="right") DataFrame(x).plot(ax=axes["C"]) - - def test_bar_subplot_stacking(self): - #GH Issue 61018 - test_data = np.random.default_rng(3).integers(0,100,5) - df = DataFrame({"A": test_data, "B": test_data[::-1], "C": test_data[0]}) - ax = df.plot(subplots= [('A','B')], kind="bar", stacked=True) - - #finds all the rectangles that represent the values from both subplots - data_from_subplots = [[(x.get_x(), x.get_y(), x.get_height()) for x in ax[i].findobj(plt.Rectangle) if x.get_height() in test_data] for i in range(0,2)] - - #get xy and height of squares that represent the data graphed from the df - #we would expect the height value of A to be reflected in the Y coord of B in subplot 1 - subplot_data_df_list = [] - unique_x_loc_list = [] - for i in range(0,len(data_from_subplots)): - subplot_data_df= DataFrame(data = data_from_subplots[i], columns = ["x_coord", "y_coord", "height"]) - unique_x_loc = unique(subplot_data_df["x_coord"]) - - subplot_data_df_list.append(subplot_data_df) - unique_x_loc_list.append(unique_x_loc) - - #Checks subplot 1 - plot_A_df = subplot_data_df_list[0].iloc[:len(test_data)] - plot_B_df = subplot_data_df_list[0].iloc[len(test_data):].reset_index() - total_bar_height = plot_A_df["height"].add(plot_B_df["height"]) - #check number of bars matches the number of data plotted - assert len(unique_x_loc_list[0]) == len(test_data) - #checks that the first set of bars are the correct height and that the second one starts at the top of the first, additional checks the combined height of the bars are correct - assert (plot_A_df["height"] == test_data).all() - assert (plot_B_df["y_coord"] == test_data).all() - assert (total_bar_height == test_data + test_data[::-1]).all() - - #Checks subplot 2 - plot_C_df = subplot_data_df_list[1].iloc[:len(test_data)] - #check number of bars matches the number of data plotted - assert len(unique_x_loc_list[1]) == len(test_data) - #checks that all the bars start at zero and are the correct height - assert (plot_C_df["height"] == test_data[0]).all() - assert (plot_C_df["y_coord"] == 0).all() diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 43e1255404784..d878c9200c835 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -681,3 +681,84 @@ def test_bar_plt_xaxis_intervalrange(self): (a.get_text() == b.get_text()) for a, b in zip(s.plot.bar().get_xticklabels(), expected) ) + +@pytest.fixture(scope="class") +def BSS_data() -> np.array: + yield np.random.default_rng(3).integers(0,100,5) + +@pytest.fixture(scope="class") +def BSS_df(BSS_data) -> DataFrame: + BSS_df = DataFrame({"A": BSS_data, "B": BSS_data[::-1], "C": BSS_data[0], "D": BSS_data[-1]}) + return BSS_df + +def _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division): + subplot_data_df_list = [] + + # get xy and height of squares that represent the data graphed from the df, seperated by subplots + for i in range(len(subplot_division)): + subplot_data = np.array([(x.get_x(), x.get_y(), x.get_height()) for x in ax[i].findobj(plt.Rectangle) if x.get_height() in BSS_data]) + subplot_data_df_list.append(DataFrame(data = subplot_data, columns = ["x_coord", "y_coord", "height"])) + + return subplot_data_df_list + +def _BSS_subplot_checker(BSS_data, BSS_df, subplot_data_df, subplot_columns): + assert_flag = 0 + subplot_sliced_by_source = [subplot_data_df.iloc[len(BSS_data) * i : len(BSS_data) * (i+1)].reset_index() for i in range(0, len(subplot_columns))] + expected_total_height = BSS_df.loc[:,subplot_columns].sum(axis=1) + + for i in range(len(subplot_columns)): + sliced_df = subplot_sliced_by_source[i] + if i == 0: + #Checks that the bar chart starts y=0 + assert (sliced_df["y_coord"] == 0).all + height_iter = sliced_df["y_coord"].add(sliced_df["height"]) + else: + height_iter = height_iter + sliced_df["height"] + + if i+1 == len(subplot_columns): + #Checks final height matches what is expected + tm.assert_series_equal(height_iter, expected_total_height, check_names = False, check_dtype= False) + + else: + #Checks each preceding bar ends where the next one starts + next_start_coord = subplot_sliced_by_source[i+1]["y_coord"] + tm.assert_series_equal(height_iter, next_start_coord, check_names = False, check_dtype= False) + +class TestBarSubplotStacked: + #GH Issue 61018 + def test_bar_1_subplot_1_double_stacked(self, BSS_data, BSS_df): + columns_used = ["A", "B"] + BSS_df_trimmed = BSS_df[columns_used] + subplot_division = [columns_used] + ax = BSS_df_trimmed.plot(subplots = subplot_division, kind="bar", stacked=True) + subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) + for i in range(len(subplot_data_df_list)): + _BSS_subplot_checker(BSS_data, BSS_df_trimmed, subplot_data_df_list[i], subplot_division[i]) + plt.savefig("1s1d.png") + + + def test_bar_2_subplot_1_double_stacked(self, BSS_data, BSS_df): + columns_used = ["A", "B", "C"] + BSS_df_trimmed = BSS_df[columns_used] + subplot_division = [(columns_used[0], columns_used[1]), (columns_used[2],)] + ax = BSS_df_trimmed.plot(subplots = subplot_division, kind="bar", stacked=True) + subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) + for i in range(len(subplot_data_df_list)): + _BSS_subplot_checker(BSS_data, BSS_df_trimmed, subplot_data_df_list[i], subplot_division[i]) + plt.savefig("2s1d.png") + + def test_bar_2_subplot_2_double_stacked(self, BSS_data, BSS_df): + subplot_division = [('A', 'D'), ('C', 'B')] + ax = BSS_df.plot(subplots = subplot_division, kind="bar", stacked=True) + subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) + for i in range(len(subplot_data_df_list)): + _BSS_subplot_checker(BSS_data, BSS_df, subplot_data_df_list[i], subplot_division[i]) + plt.savefig("2s2d.png") + + def test_bar_2_subplots_1_triple_stacked(self, BSS_data, BSS_df): + subplot_division = [('A', 'D', 'C')] + ax = BSS_df.plot(subplots = subplot_division, kind="bar", stacked=True) + subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) + for i in range(len(subplot_data_df_list)): + _BSS_subplot_checker(BSS_data, BSS_df, subplot_data_df_list[i], subplot_division[i]) + plt.savefig("2s1t.png") \ No newline at end of file From de55789ca419656bd555068882189a6d824a738c Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Tue, 8 Apr 2025 00:53:10 +0000 Subject: [PATCH 05/30] removed savefig() left in test cases --- pandas/tests/plotting/test_misc.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index d878c9200c835..e8dd244bd9e63 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -734,7 +734,6 @@ def test_bar_1_subplot_1_double_stacked(self, BSS_data, BSS_df): subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) for i in range(len(subplot_data_df_list)): _BSS_subplot_checker(BSS_data, BSS_df_trimmed, subplot_data_df_list[i], subplot_division[i]) - plt.savefig("1s1d.png") def test_bar_2_subplot_1_double_stacked(self, BSS_data, BSS_df): @@ -745,7 +744,6 @@ def test_bar_2_subplot_1_double_stacked(self, BSS_data, BSS_df): subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) for i in range(len(subplot_data_df_list)): _BSS_subplot_checker(BSS_data, BSS_df_trimmed, subplot_data_df_list[i], subplot_division[i]) - plt.savefig("2s1d.png") def test_bar_2_subplot_2_double_stacked(self, BSS_data, BSS_df): subplot_division = [('A', 'D'), ('C', 'B')] @@ -753,12 +751,10 @@ def test_bar_2_subplot_2_double_stacked(self, BSS_data, BSS_df): subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) for i in range(len(subplot_data_df_list)): _BSS_subplot_checker(BSS_data, BSS_df, subplot_data_df_list[i], subplot_division[i]) - plt.savefig("2s2d.png") def test_bar_2_subplots_1_triple_stacked(self, BSS_data, BSS_df): subplot_division = [('A', 'D', 'C')] ax = BSS_df.plot(subplots = subplot_division, kind="bar", stacked=True) subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) for i in range(len(subplot_data_df_list)): - _BSS_subplot_checker(BSS_data, BSS_df, subplot_data_df_list[i], subplot_division[i]) - plt.savefig("2s1t.png") \ No newline at end of file + _BSS_subplot_checker(BSS_data, BSS_df, subplot_data_df_list[i], subplot_division[i]) \ No newline at end of file From e8a6f91f992aa2e38fb1ef8af4cfff26d56eb035 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Tue, 22 Apr 2025 22:47:22 +0000 Subject: [PATCH 06/30] Updated test cases to test more arrangements --- pandas/tests/plotting/test_misc.py | 36 +++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index e8dd244bd9e63..10400870ecb80 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -726,8 +726,11 @@ def _BSS_subplot_checker(BSS_data, BSS_df, subplot_data_df, subplot_columns): class TestBarSubplotStacked: #GH Issue 61018 - def test_bar_1_subplot_1_double_stacked(self, BSS_data, BSS_df): - columns_used = ["A", "B"] + @pytest.mark.parametrize("columns_used",[["A", "B"], + ["C", "D"], + ["D", "A"] + ]) + def test_bar_1_subplot_1_double_stacked(self, BSS_data, BSS_df, columns_used): BSS_df_trimmed = BSS_df[columns_used] subplot_division = [columns_used] ax = BSS_df_trimmed.plot(subplots = subplot_division, kind="bar", stacked=True) @@ -735,25 +738,36 @@ def test_bar_1_subplot_1_double_stacked(self, BSS_data, BSS_df): for i in range(len(subplot_data_df_list)): _BSS_subplot_checker(BSS_data, BSS_df_trimmed, subplot_data_df_list[i], subplot_division[i]) - - def test_bar_2_subplot_1_double_stacked(self, BSS_data, BSS_df): - columns_used = ["A", "B", "C"] - BSS_df_trimmed = BSS_df[columns_used] + @pytest.mark.parametrize("columns_used",[["A", "B", "C"], + ["A", "C", "B"], + ["D", "A", "C"] + + ]) + def test_bar_2_subplot_1_double_stacked(self, BSS_data, BSS_df, columns_used): + BSS_df_trimmed = BSS_df[columns_used] subplot_division = [(columns_used[0], columns_used[1]), (columns_used[2],)] ax = BSS_df_trimmed.plot(subplots = subplot_division, kind="bar", stacked=True) subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) for i in range(len(subplot_data_df_list)): _BSS_subplot_checker(BSS_data, BSS_df_trimmed, subplot_data_df_list[i], subplot_division[i]) - def test_bar_2_subplot_2_double_stacked(self, BSS_data, BSS_df): - subplot_division = [('A', 'D'), ('C', 'B')] + @pytest.mark.parametrize("subplot_division", [[("A", "B"), ("C", "D")], + [("A", "D"), ("C", "B")], + [("B", "C"), ("D", "A")], + [("B", "D"), ("C", "A")] + ]) + def test_bar_2_subplot_2_double_stacked(self, BSS_data, BSS_df, subplot_division): ax = BSS_df.plot(subplots = subplot_division, kind="bar", stacked=True) subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) for i in range(len(subplot_data_df_list)): _BSS_subplot_checker(BSS_data, BSS_df, subplot_data_df_list[i], subplot_division[i]) - - def test_bar_2_subplots_1_triple_stacked(self, BSS_data, BSS_df): - subplot_division = [('A', 'D', 'C')] + + @pytest.mark.parametrize("subplot_division", [[("A", "B", "C")], + [("A", "D", "B")], + [("C", "A", "D")], + [("D", "C", "A")] + ]) + def test_bar_2_subplots_1_triple_stacked(self, BSS_data, BSS_df, subplot_division): ax = BSS_df.plot(subplots = subplot_division, kind="bar", stacked=True) subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) for i in range(len(subplot_data_df_list)): From 0d9f5daf1d422f8f0d61a1e3dbe4e7ee3b722ae5 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Tue, 22 Apr 2025 23:58:13 +0000 Subject: [PATCH 07/30] Completed function fix (order of subplot input does not matter, need clarification if it matters) --- pandas/plotting/_matplotlib/core.py | 41 ++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 1035150302d2c..6192b6c611b6e 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1921,6 +1921,19 @@ def _make_plot(self, fig: Figure) -> None: K = self.nseries data = self.data.fillna(0) + + _stacked_subplots_ind_dict = {} + _stacked_subplots_offsets = [] + + if self.subplots != False & self.stacked: + # _stacked_subplots_list = [sorted(x) for x in self.subplots if len(x) > 1] + temp_ss_dict = {x: self.subplots[x] for x in range(len(self.subplots)) if len(self.subplots[x]) > 1} + for k, v in temp_ss_dict.items(): + for x in v: + _stacked_subplots_ind_dict.setdefault(int(x), k) + + _stacked_subplots_offsets.append([0,0]) + for i, (label, y) in enumerate(self._iter_data(data=data)): ax = self._get_ax(i) kwds = self.kwds.copy() @@ -1946,7 +1959,33 @@ def _make_plot(self, fig: Figure) -> None: start = start + self._start_base kwds["align"] = self._align - if self.subplots: + + try: + offset_index = _stacked_subplots_ind_dict[i] + _stacked_subplots_flag = 1 + except: + _stacked_subplots_flag = 0 + + if _stacked_subplots_flag: + mask = y >= 0 + pos_prior, neg_prior = _stacked_subplots_offsets[offset_index] + start = np.where(mask, pos_prior, neg_prior) + self._start_base + w = self.bar_width / 2 + rect = self._plot( + ax, + self.ax_pos + w, + y, + self.bar_width, + start=start, + label=label, + log=self.log, + **kwds, + ) + pos_new = pos_prior + np.where(mask, y, 0) + neg_new = neg_prior + np.where(mask, 0, y) + _stacked_subplots_offsets[offset_index] = [pos_new, neg_new] + + elif self.subplots: w = self.bar_width / 2 rect = self._plot( ax, From b3560288f5b69df1f013a3072775d3b930815ae4 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Wed, 23 Apr 2025 00:15:05 +0000 Subject: [PATCH 08/30] appeasing the great pre-commit formatter --- pandas/plotting/_matplotlib/core.py | 28 +++--- pandas/tests/plotting/test_common.py | 6 +- pandas/tests/plotting/test_misc.py | 137 +++++++++++++++++---------- 3 files changed, 100 insertions(+), 71 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 6192b6c611b6e..6462e77c0ffad 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1922,17 +1922,20 @@ def _make_plot(self, fig: Figure) -> None: data = self.data.fillna(0) - _stacked_subplots_ind_dict = {} - _stacked_subplots_offsets = [] + _stacked_subplots_ind_dict = {} + _stacked_subplots_offsets = [] if self.subplots != False & self.stacked: - # _stacked_subplots_list = [sorted(x) for x in self.subplots if len(x) > 1] - temp_ss_dict = {x: self.subplots[x] for x in range(len(self.subplots)) if len(self.subplots[x]) > 1} + temp_ss_dict = { + x: self.subplots[x] + for x in range(len(self.subplots)) + if len(self.subplots[x]) > 1 + } for k, v in temp_ss_dict.items(): for x in v: _stacked_subplots_ind_dict.setdefault(int(x), k) - - _stacked_subplots_offsets.append([0,0]) + + _stacked_subplots_offsets.append([0, 0]) for i, (label, y) in enumerate(self._iter_data(data=data)): ax = self._get_ax(i) @@ -1959,16 +1962,11 @@ def _make_plot(self, fig: Figure) -> None: start = start + self._start_base kwds["align"] = self._align - - try: - offset_index = _stacked_subplots_ind_dict[i] - _stacked_subplots_flag = 1 - except: - _stacked_subplots_flag = 0 - if _stacked_subplots_flag: - mask = y >= 0 + if i in _stacked_subplots_ind_dict: + offset_index = _stacked_subplots_ind_dict[i] pos_prior, neg_prior = _stacked_subplots_offsets[offset_index] + mask = y >= 0 start = np.where(mask, pos_prior, neg_prior) + self._start_base w = self.bar_width / 2 rect = self._plot( @@ -1984,7 +1982,7 @@ def _make_plot(self, fig: Figure) -> None: pos_new = pos_prior + np.where(mask, y, 0) neg_new = neg_prior + np.where(mask, 0, y) _stacked_subplots_offsets[offset_index] = [pos_new, neg_new] - + elif self.subplots: w = self.bar_width / 2 rect = self._plot( diff --git a/pandas/tests/plotting/test_common.py b/pandas/tests/plotting/test_common.py index ee7b57ee62b3b..20daf59356248 100644 --- a/pandas/tests/plotting/test_common.py +++ b/pandas/tests/plotting/test_common.py @@ -1,5 +1,5 @@ import pytest -import numpy as np + from pandas import DataFrame from pandas.tests.plotting.common import ( _check_plot_works, @@ -58,7 +58,3 @@ def test_colorbar_layout(self): fig.colorbar(cs0, ax=[axes["A"], axes["B"]], location="right") DataFrame(x).plot(ax=axes["C"]) - - - - diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 10400870ecb80..4c0c77152f0f7 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -682,93 +682,128 @@ def test_bar_plt_xaxis_intervalrange(self): for a, b in zip(s.plot.bar().get_xticklabels(), expected) ) + @pytest.fixture(scope="class") def BSS_data() -> np.array: - yield np.random.default_rng(3).integers(0,100,5) + return np.random.default_rng(3).integers(0, 100, 5) + @pytest.fixture(scope="class") def BSS_df(BSS_data) -> DataFrame: - BSS_df = DataFrame({"A": BSS_data, "B": BSS_data[::-1], "C": BSS_data[0], "D": BSS_data[-1]}) + BSS_df = DataFrame( + {"A": BSS_data, "B": BSS_data[::-1], "C": BSS_data[0], "D": BSS_data[-1]} + ) return BSS_df + def _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division): subplot_data_df_list = [] - # get xy and height of squares that represent the data graphed from the df, seperated by subplots + # get xy and height of squares representing data, separated by subplots for i in range(len(subplot_division)): - subplot_data = np.array([(x.get_x(), x.get_y(), x.get_height()) for x in ax[i].findobj(plt.Rectangle) if x.get_height() in BSS_data]) - subplot_data_df_list.append(DataFrame(data = subplot_data, columns = ["x_coord", "y_coord", "height"])) + subplot_data = np.array( + [ + (x.get_x(), x.get_y(), x.get_height()) + for x in ax[i].findobj(plt.Rectangle) + if x.get_height() in BSS_data + ] + ) + subplot_data_df_list.append( + DataFrame(data=subplot_data, columns=["x_coord", "y_coord", "height"]) + ) return subplot_data_df_list + def _BSS_subplot_checker(BSS_data, BSS_df, subplot_data_df, subplot_columns): - assert_flag = 0 - subplot_sliced_by_source = [subplot_data_df.iloc[len(BSS_data) * i : len(BSS_data) * (i+1)].reset_index() for i in range(0, len(subplot_columns))] - expected_total_height = BSS_df.loc[:,subplot_columns].sum(axis=1) - + subplot_sliced_by_source = [ + subplot_data_df.iloc[len(BSS_data) * i : len(BSS_data) * (i + 1)].reset_index() + for i in range(len(subplot_columns)) + ] + expected_total_height = BSS_df.loc[:, subplot_columns].sum(axis=1) + for i in range(len(subplot_columns)): sliced_df = subplot_sliced_by_source[i] if i == 0: - #Checks that the bar chart starts y=0 + # Checks that the bar chart starts y=0 assert (sliced_df["y_coord"] == 0).all height_iter = sliced_df["y_coord"].add(sliced_df["height"]) else: height_iter = height_iter + sliced_df["height"] - if i+1 == len(subplot_columns): - #Checks final height matches what is expected - tm.assert_series_equal(height_iter, expected_total_height, check_names = False, check_dtype= False) - + if i + 1 == len(subplot_columns): + # Checks final height matches what is expected + tm.assert_series_equal( + height_iter, expected_total_height, check_names=False, check_dtype=False + ) + else: - #Checks each preceding bar ends where the next one starts - next_start_coord = subplot_sliced_by_source[i+1]["y_coord"] - tm.assert_series_equal(height_iter, next_start_coord, check_names = False, check_dtype= False) + # Checks each preceding bar ends where the next one starts + next_start_coord = subplot_sliced_by_source[i + 1]["y_coord"] + tm.assert_series_equal( + height_iter, next_start_coord, check_names=False, check_dtype=False + ) + class TestBarSubplotStacked: - #GH Issue 61018 - @pytest.mark.parametrize("columns_used",[["A", "B"], - ["C", "D"], - ["D", "A"] - ]) + # GH Issue 61018 + @pytest.mark.parametrize("columns_used", [["A", "B"], ["C", "D"], ["D", "A"]]) def test_bar_1_subplot_1_double_stacked(self, BSS_data, BSS_df, columns_used): BSS_df_trimmed = BSS_df[columns_used] subplot_division = [columns_used] - ax = BSS_df_trimmed.plot(subplots = subplot_division, kind="bar", stacked=True) - subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) + ax = BSS_df_trimmed.plot(subplots=subplot_division, kind="bar", stacked=True) + subplot_data_df_list = _BSS_xyheight_from_ax_helper( + BSS_data, ax, subplot_division + ) for i in range(len(subplot_data_df_list)): - _BSS_subplot_checker(BSS_data, BSS_df_trimmed, subplot_data_df_list[i], subplot_division[i]) + _BSS_subplot_checker( + BSS_data, BSS_df_trimmed, subplot_data_df_list[i], subplot_division[i] + ) - @pytest.mark.parametrize("columns_used",[["A", "B", "C"], - ["A", "C", "B"], - ["D", "A", "C"] - - ]) + @pytest.mark.parametrize( + "columns_used", [["A", "B", "C"], ["A", "C", "B"], ["D", "A", "C"]] + ) def test_bar_2_subplot_1_double_stacked(self, BSS_data, BSS_df, columns_used): - BSS_df_trimmed = BSS_df[columns_used] + BSS_df_trimmed = BSS_df[columns_used] subplot_division = [(columns_used[0], columns_used[1]), (columns_used[2],)] - ax = BSS_df_trimmed.plot(subplots = subplot_division, kind="bar", stacked=True) - subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) + ax = BSS_df_trimmed.plot(subplots=subplot_division, kind="bar", stacked=True) + subplot_data_df_list = _BSS_xyheight_from_ax_helper( + BSS_data, ax, subplot_division + ) for i in range(len(subplot_data_df_list)): - _BSS_subplot_checker(BSS_data, BSS_df_trimmed, subplot_data_df_list[i], subplot_division[i]) + _BSS_subplot_checker( + BSS_data, BSS_df_trimmed, subplot_data_df_list[i], subplot_division[i] + ) - @pytest.mark.parametrize("subplot_division", [[("A", "B"), ("C", "D")], - [("A", "D"), ("C", "B")], - [("B", "C"), ("D", "A")], - [("B", "D"), ("C", "A")] - ]) + @pytest.mark.parametrize( + "subplot_division", + [ + [("A", "B"), ("C", "D")], + [("A", "D"), ("C", "B")], + [("B", "C"), ("D", "A")], + [("B", "D"), ("C", "A")], + ], + ) def test_bar_2_subplot_2_double_stacked(self, BSS_data, BSS_df, subplot_division): - ax = BSS_df.plot(subplots = subplot_division, kind="bar", stacked=True) - subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) + ax = BSS_df.plot(subplots=subplot_division, kind="bar", stacked=True) + subplot_data_df_list = _BSS_xyheight_from_ax_helper( + BSS_data, ax, subplot_division + ) for i in range(len(subplot_data_df_list)): - _BSS_subplot_checker(BSS_data, BSS_df, subplot_data_df_list[i], subplot_division[i]) - - @pytest.mark.parametrize("subplot_division", [[("A", "B", "C")], - [("A", "D", "B")], - [("C", "A", "D")], - [("D", "C", "A")] - ]) + _BSS_subplot_checker( + BSS_data, BSS_df, subplot_data_df_list[i], subplot_division[i] + ) + + @pytest.mark.parametrize( + "subplot_division", + [[("A", "B", "C")], [("A", "D", "B")], [("C", "A", "D")], [("D", "C", "A")]], + ) def test_bar_2_subplots_1_triple_stacked(self, BSS_data, BSS_df, subplot_division): - ax = BSS_df.plot(subplots = subplot_division, kind="bar", stacked=True) - subplot_data_df_list = _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division) + ax = BSS_df.plot(subplots=subplot_division, kind="bar", stacked=True) + subplot_data_df_list = _BSS_xyheight_from_ax_helper( + BSS_data, ax, subplot_division + ) for i in range(len(subplot_data_df_list)): - _BSS_subplot_checker(BSS_data, BSS_df, subplot_data_df_list[i], subplot_division[i]) \ No newline at end of file + _BSS_subplot_checker( + BSS_data, BSS_df, subplot_data_df_list[i], subplot_division[i] + ) From 4cc2905dac614a30409a5ed8343d1e3c0d08a0fa Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Wed, 23 Apr 2025 00:50:42 +0000 Subject: [PATCH 09/30] Updated whatsnew --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index f76d94036c6d8..e9c63b2999f1d 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -795,6 +795,7 @@ Period Plotting ^^^^^^^^ - Bug in :meth:`.DataFrameGroupBy.boxplot` failed when there were multiple groupings (:issue:`14701`) +- Bug in :meth:`DataFrame.plot.bar` when ``subplots`` and ``stacked=True`` are used in conjunction which causes incorrect stacking. (:issue:`61018`) - Bug in :meth:`DataFrame.plot.bar` with ``stacked=True`` where labels on stacked bars with zero-height segments were incorrectly positioned at the base instead of the label position of the previous segment (:issue:`59429`) - Bug in :meth:`DataFrame.plot.line` raising ``ValueError`` when set both color and a ``dict`` style (:issue:`59461`) - Bug in :meth:`DataFrame.plot` that causes a shift to the right when the frequency multiplier is greater than one. (:issue:`57587`) From 99746323168da2dfeb5948ea7a46c595a7ba967f Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Wed, 23 Apr 2025 04:49:15 +0000 Subject: [PATCH 10/30] Docstring adjustment --- pandas/plotting/_matplotlib/core.py | 17 ++++++++--------- pandas/tests/plotting/test_misc.py | 2 +- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 0c0542503b377..f9cdaae50f069 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1929,18 +1929,17 @@ def _make_plot(self, fig: Figure) -> None: data = self.data.fillna(0) - _stacked_subplots_ind_dict = {} + _stacked_subplots_ind = {} _stacked_subplots_offsets = [] if self.subplots != False & self.stacked: - temp_ss_dict = { - x: self.subplots[x] - for x in range(len(self.subplots)) - if len(self.subplots[x]) > 1 + sub_range = range(len(self.subplots)) + ss_temp = { + x: self.subplots[x] for x in sub_range if len(self.subplots[x]) > 1 } - for k, v in temp_ss_dict.items(): + for k, v in ss_temp.items(): for x in v: - _stacked_subplots_ind_dict.setdefault(int(x), k) + _stacked_subplots_ind.setdefault(int(x), k) _stacked_subplots_offsets.append([0, 0]) @@ -1970,8 +1969,8 @@ def _make_plot(self, fig: Figure) -> None: kwds["align"] = self._align - if i in _stacked_subplots_ind_dict: - offset_index = _stacked_subplots_ind_dict[i] + if i in _stacked_subplots_ind: + offset_index = _stacked_subplots_ind[i] pos_prior, neg_prior = _stacked_subplots_offsets[offset_index] mask = y >= 0 start = np.where(mask, pos_prior, neg_prior) + self._start_base diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 4c0c77152f0f7..f97f3a9a2ff95 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -684,7 +684,7 @@ def test_bar_plt_xaxis_intervalrange(self): @pytest.fixture(scope="class") -def BSS_data() -> np.array: +def BSS_data(): return np.random.default_rng(3).integers(0, 100, 5) From ddd331175dfaef3c0f4c4f9bea21952a307e9ed6 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Wed, 23 Apr 2025 05:12:42 +0000 Subject: [PATCH 11/30] Moved self.subplot check to a seperate bool --- pandas/plotting/_matplotlib/core.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index f9cdaae50f069..7ebc8948b91b5 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1932,7 +1932,12 @@ def _make_plot(self, fig: Figure) -> None: _stacked_subplots_ind = {} _stacked_subplots_offsets = [] - if self.subplots != False & self.stacked: + if self.subplots: + subplots_status = 1 + else: + subplots_status = 0 + + if subplots_status & self.stacked: sub_range = range(len(self.subplots)) ss_temp = { x: self.subplots[x] for x in sub_range if len(self.subplots[x]) > 1 From df51168837626684ccbf720e8090ac39e0f92fff Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Wed, 23 Apr 2025 06:25:54 +0000 Subject: [PATCH 12/30] Added ignore where mypy thinks self.subplots is a bool --- pandas/plotting/_matplotlib/core.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 7ebc8948b91b5..618ecee1fdb09 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1929,18 +1929,16 @@ def _make_plot(self, fig: Figure) -> None: data = self.data.fillna(0) - _stacked_subplots_ind = {} + _stacked_subplots_ind: dict[int, int] = {} _stacked_subplots_offsets = [] - if self.subplots: - subplots_status = 1 - else: - subplots_status = 0 - - if subplots_status & self.stacked: - sub_range = range(len(self.subplots)) + if self.subplots != False & self.stacked: + sub_range = range(len(self.subplots)) # type:ignore[arg-type] ss_temp = { - x: self.subplots[x] for x in sub_range if len(self.subplots[x]) > 1 + # mypy thinks self.subplots is a bool :( + x: self.subplots[x] # type:ignore[index] + for x in sub_range + if len(self.subplots[x]) > 1 # type:ignore[index] } for k, v in ss_temp.items(): for x in v: @@ -1976,7 +1974,7 @@ def _make_plot(self, fig: Figure) -> None: if i in _stacked_subplots_ind: offset_index = _stacked_subplots_ind[i] - pos_prior, neg_prior = _stacked_subplots_offsets[offset_index] + pos_prior, neg_prior = _stacked_subplots_offsets[offset_index] # type:ignore[assignment] mask = y >= 0 start = np.where(mask, pos_prior, neg_prior) + self._start_base w = self.bar_width / 2 From bfb1dd12d6ddc895331e692801dc5d185bcca4a4 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Wed, 23 Apr 2025 06:59:08 +0000 Subject: [PATCH 13/30] Actually addressed mypy typing --- pandas/plotting/_matplotlib/core.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 618ecee1fdb09..dc931e26ca660 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1932,13 +1932,16 @@ def _make_plot(self, fig: Figure) -> None: _stacked_subplots_ind: dict[int, int] = {} _stacked_subplots_offsets = [] - if self.subplots != False & self.stacked: - sub_range = range(len(self.subplots)) # type:ignore[arg-type] + self.subplots: list[Any] + if self.subplots: + subplots_flag = 1 + else: + subplots_flag = 0 + + if subplots_flag & self.stacked: + sub_range = range(len(self.subplots)) ss_temp = { - # mypy thinks self.subplots is a bool :( - x: self.subplots[x] # type:ignore[index] - for x in sub_range - if len(self.subplots[x]) > 1 # type:ignore[index] + x: self.subplots[x] for x in sub_range if len(self.subplots[x]) > 1 } for k, v in ss_temp.items(): for x in v: From e4d504028f9075fa087ce3e24a40ddcfc4950772 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Fri, 25 Apr 2025 20:35:03 +0000 Subject: [PATCH 14/30] Incorperated initial PR comments --- pandas/plotting/_matplotlib/core.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index dc931e26ca660..90fee144ae92a 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1939,14 +1939,11 @@ def _make_plot(self, fig: Figure) -> None: subplots_flag = 0 if subplots_flag & self.stacked: - sub_range = range(len(self.subplots)) - ss_temp = { - x: self.subplots[x] for x in sub_range if len(self.subplots[x]) > 1 - } - for k, v in ss_temp.items(): - for x in v: - _stacked_subplots_ind.setdefault(int(x), k) - + for i, sub_plot in enumerate(self.subplots): + if len(sub_plot) <= 1: + continue + for plot in sub_plot: + _stacked_subplots_ind[int(plot)] = i _stacked_subplots_offsets.append([0, 0]) for i, (label, y) in enumerate(self._iter_data(data=data)): From 101ee62f8a462bf0b59a0ad7d7811679af2b37b4 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Fri, 25 Apr 2025 20:38:33 +0000 Subject: [PATCH 15/30] Updated missing () after .all --- pandas/tests/plotting/test_misc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index f97f3a9a2ff95..54f6cdbafe59c 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -726,7 +726,7 @@ def _BSS_subplot_checker(BSS_data, BSS_df, subplot_data_df, subplot_columns): sliced_df = subplot_sliced_by_source[i] if i == 0: # Checks that the bar chart starts y=0 - assert (sliced_df["y_coord"] == 0).all + assert (sliced_df["y_coord"] == 0).all() height_iter = sliced_df["y_coord"].add(sliced_df["height"]) else: height_iter = height_iter + sliced_df["height"] From cfa3adaf67c97ae2a0969034204edd783eff5f42 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Fri, 25 Apr 2025 21:18:06 +0000 Subject: [PATCH 16/30] Initial test cases --- pandas/tests/plotting/test_misc.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 43e1255404784..bf91ba9f9ce93 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -681,3 +681,23 @@ def test_bar_plt_xaxis_intervalrange(self): (a.get_text() == b.get_text()) for a, b in zip(s.plot.bar().get_xticklabels(), expected) ) + + +def test_plot_bar_label_count_expected_fail(): + df = DataFrame([(30, 10, 10), (20, 20, 20), (10, 30, 30)], columns=list("ABC")) + with pytest.raises( + ValueError, + match="The length of `title` must equal the number of columns " + "if using `title` of type `list` and `subplots=True`.", + ): + df.plot( + subplots=[("A", "B")], + kind="bar", + stacked=True, + title=["A&B", "C", "Extra Title"], + ) + + +def test_plot_bar_label_count_expected_success(): + df = DataFrame([(30, 10, 10), (20, 20, 20), (10, 30, 30)], columns=list("ABC")) + df.plot(subplots=[("A", "B")], kind="bar", stacked=True, title=["A&B", "C"]) From 2a8f126c5b5f70b065ec952ad4c89dd2f9fba7c3 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Fri, 25 Apr 2025 22:12:56 +0000 Subject: [PATCH 17/30] Addressed more comments on PR --- pandas/plotting/_matplotlib/core.py | 6 +- pandas/tests/plotting/test_misc.py | 143 +++++++++++++++------------- 2 files changed, 77 insertions(+), 72 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 90fee144ae92a..bd5946dc20065 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1933,12 +1933,8 @@ def _make_plot(self, fig: Figure) -> None: _stacked_subplots_offsets = [] self.subplots: list[Any] - if self.subplots: - subplots_flag = 1 - else: - subplots_flag = 0 - if subplots_flag & self.stacked: + if bool(self.subplots) & self.stacked: for i, sub_plot in enumerate(self.subplots): if len(sub_plot) <= 1: continue diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 54f6cdbafe59c..f4a0f1e792ae6 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -683,20 +683,25 @@ def test_bar_plt_xaxis_intervalrange(self): ) -@pytest.fixture(scope="class") -def BSS_data(): +@pytest.fixture +def df_bar_data(): return np.random.default_rng(3).integers(0, 100, 5) -@pytest.fixture(scope="class") -def BSS_df(BSS_data) -> DataFrame: - BSS_df = DataFrame( - {"A": BSS_data, "B": BSS_data[::-1], "C": BSS_data[0], "D": BSS_data[-1]} +@pytest.fixture +def df_bar_df(df_bar_data) -> DataFrame: + df_bar_df = DataFrame( + { + "A": df_bar_data, + "B": df_bar_data[::-1], + "C": df_bar_data[0], + "D": df_bar_data[-1], + } ) - return BSS_df + return df_bar_df -def _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division): +def _df_bar_xyheight_from_ax_helper(df_bar_data, ax, subplot_division): subplot_data_df_list = [] # get xy and height of squares representing data, separated by subplots @@ -705,7 +710,7 @@ def _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division): [ (x.get_x(), x.get_y(), x.get_height()) for x in ax[i].findobj(plt.Rectangle) - if x.get_height() in BSS_data + if x.get_height() in df_bar_data ] ) subplot_data_df_list.append( @@ -715,12 +720,14 @@ def _BSS_xyheight_from_ax_helper(BSS_data, ax, subplot_division): return subplot_data_df_list -def _BSS_subplot_checker(BSS_data, BSS_df, subplot_data_df, subplot_columns): +def _df_bar_subplot_checker(df_bar_data, df_bar_df, subplot_data_df, subplot_columns): subplot_sliced_by_source = [ - subplot_data_df.iloc[len(BSS_data) * i : len(BSS_data) * (i + 1)].reset_index() + subplot_data_df.iloc[ + len(df_bar_data) * i : len(df_bar_data) * (i + 1) + ].reset_index() for i in range(len(subplot_columns)) ] - expected_total_height = BSS_df.loc[:, subplot_columns].sum(axis=1) + expected_total_height = df_bar_df.loc[:, subplot_columns].sum(axis=1) for i in range(len(subplot_columns)): sliced_df = subplot_sliced_by_source[i] @@ -745,65 +752,67 @@ def _BSS_subplot_checker(BSS_data, BSS_df, subplot_data_df, subplot_columns): ) -class TestBarSubplotStacked: - # GH Issue 61018 - @pytest.mark.parametrize("columns_used", [["A", "B"], ["C", "D"], ["D", "A"]]) - def test_bar_1_subplot_1_double_stacked(self, BSS_data, BSS_df, columns_used): - BSS_df_trimmed = BSS_df[columns_used] - subplot_division = [columns_used] - ax = BSS_df_trimmed.plot(subplots=subplot_division, kind="bar", stacked=True) - subplot_data_df_list = _BSS_xyheight_from_ax_helper( - BSS_data, ax, subplot_division +# GH Issue 61018 +@pytest.mark.parametrize("columns_used", [["A", "B"], ["C", "D"], ["D", "A"]]) +def test_bar_1_subplot_1_double_stacked(df_bar_data, df_bar_df, columns_used): + df_bar_df_trimmed = df_bar_df[columns_used] + subplot_division = [columns_used] + ax = df_bar_df_trimmed.plot(subplots=subplot_division, kind="bar", stacked=True) + subplot_data_df_list = _df_bar_xyheight_from_ax_helper( + df_bar_data, ax, subplot_division + ) + for i in range(len(subplot_data_df_list)): + _df_bar_subplot_checker( + df_bar_data, df_bar_df_trimmed, subplot_data_df_list[i], subplot_division[i] ) - for i in range(len(subplot_data_df_list)): - _BSS_subplot_checker( - BSS_data, BSS_df_trimmed, subplot_data_df_list[i], subplot_division[i] - ) - @pytest.mark.parametrize( - "columns_used", [["A", "B", "C"], ["A", "C", "B"], ["D", "A", "C"]] + +@pytest.mark.parametrize( + "columns_used", [["A", "B", "C"], ["A", "C", "B"], ["D", "A", "C"]] +) +def test_bar_2_subplot_1_double_stacked(df_bar_data, df_bar_df, columns_used): + df_bar_df_trimmed = df_bar_df[columns_used] + subplot_division = [(columns_used[0], columns_used[1]), (columns_used[2],)] + ax = df_bar_df_trimmed.plot(subplots=subplot_division, kind="bar", stacked=True) + subplot_data_df_list = _df_bar_xyheight_from_ax_helper( + df_bar_data, ax, subplot_division ) - def test_bar_2_subplot_1_double_stacked(self, BSS_data, BSS_df, columns_used): - BSS_df_trimmed = BSS_df[columns_used] - subplot_division = [(columns_used[0], columns_used[1]), (columns_used[2],)] - ax = BSS_df_trimmed.plot(subplots=subplot_division, kind="bar", stacked=True) - subplot_data_df_list = _BSS_xyheight_from_ax_helper( - BSS_data, ax, subplot_division - ) - for i in range(len(subplot_data_df_list)): - _BSS_subplot_checker( - BSS_data, BSS_df_trimmed, subplot_data_df_list[i], subplot_division[i] - ) + for i in range(len(subplot_data_df_list)): + _df_bar_subplot_checker( + df_bar_data, df_bar_df_trimmed, subplot_data_df_list[i], subplot_division[i] + ) - @pytest.mark.parametrize( - "subplot_division", - [ - [("A", "B"), ("C", "D")], - [("A", "D"), ("C", "B")], - [("B", "C"), ("D", "A")], - [("B", "D"), ("C", "A")], - ], + +@pytest.mark.parametrize( + "subplot_division", + [ + [("A", "B"), ("C", "D")], + [("A", "D"), ("C", "B")], + [("B", "C"), ("D", "A")], + [("B", "D"), ("C", "A")], + ], +) +def test_bar_2_subplot_2_double_stacked(df_bar_data, df_bar_df, subplot_division): + ax = df_bar_df.plot(subplots=subplot_division, kind="bar", stacked=True) + subplot_data_df_list = _df_bar_xyheight_from_ax_helper( + df_bar_data, ax, subplot_division ) - def test_bar_2_subplot_2_double_stacked(self, BSS_data, BSS_df, subplot_division): - ax = BSS_df.plot(subplots=subplot_division, kind="bar", stacked=True) - subplot_data_df_list = _BSS_xyheight_from_ax_helper( - BSS_data, ax, subplot_division - ) - for i in range(len(subplot_data_df_list)): - _BSS_subplot_checker( - BSS_data, BSS_df, subplot_data_df_list[i], subplot_division[i] - ) + for i in range(len(subplot_data_df_list)): + _df_bar_subplot_checker( + df_bar_data, df_bar_df, subplot_data_df_list[i], subplot_division[i] + ) - @pytest.mark.parametrize( - "subplot_division", - [[("A", "B", "C")], [("A", "D", "B")], [("C", "A", "D")], [("D", "C", "A")]], + +@pytest.mark.parametrize( + "subplot_division", + [[("A", "B", "C")], [("A", "D", "B")], [("C", "A", "D")], [("D", "C", "A")]], +) +def test_bar_2_subplots_1_triple_stacked(df_bar_data, df_bar_df, subplot_division): + ax = df_bar_df.plot(subplots=subplot_division, kind="bar", stacked=True) + subplot_data_df_list = _df_bar_xyheight_from_ax_helper( + df_bar_data, ax, subplot_division ) - def test_bar_2_subplots_1_triple_stacked(self, BSS_data, BSS_df, subplot_division): - ax = BSS_df.plot(subplots=subplot_division, kind="bar", stacked=True) - subplot_data_df_list = _BSS_xyheight_from_ax_helper( - BSS_data, ax, subplot_division - ) - for i in range(len(subplot_data_df_list)): - _BSS_subplot_checker( - BSS_data, BSS_df, subplot_data_df_list[i], subplot_division[i] - ) + for i in range(len(subplot_data_df_list)): + _df_bar_subplot_checker( + df_bar_data, df_bar_df, subplot_data_df_list[i], subplot_division[i] + ) From 9630ee593ba7b35190662c060503730775a22286 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Mon, 28 Apr 2025 19:51:16 +0000 Subject: [PATCH 18/30] Updated '&' to 'and' --- pandas/plotting/_matplotlib/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index bd5946dc20065..0e06cb10d2029 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1934,7 +1934,7 @@ def _make_plot(self, fig: Figure) -> None: self.subplots: list[Any] - if bool(self.subplots) & self.stacked: + if bool(self.subplots) and self.stacked: for i, sub_plot in enumerate(self.subplots): if len(sub_plot) <= 1: continue From ab4f7a650d8649a8c6a5348c7599d5fcfce323fb Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Mon, 28 Apr 2025 21:28:41 +0000 Subject: [PATCH 19/30] Updated Test cases --- pandas/tests/plotting/test_misc.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index bf91ba9f9ce93..062e77bbe33b9 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -683,8 +683,17 @@ def test_bar_plt_xaxis_intervalrange(self): ) +def test_plot_bar_label_count_default(): + df = DataFrame( + [(30, 10, 10, 10), (20, 20, 20, 20), (10, 30, 30, 10)], columns=list("ABCD") + ) + df.plot(subplots=True, kind="bar", title=["A", "B", "C", "D"]) + + def test_plot_bar_label_count_expected_fail(): - df = DataFrame([(30, 10, 10), (20, 20, 20), (10, 30, 30)], columns=list("ABC")) + df = DataFrame( + [(30, 10, 10, 10), (20, 20, 20, 20), (10, 30, 30, 10)], columns=list("ABCD") + ) with pytest.raises( ValueError, match="The length of `title` must equal the number of columns " @@ -693,11 +702,12 @@ def test_plot_bar_label_count_expected_fail(): df.plot( subplots=[("A", "B")], kind="bar", - stacked=True, - title=["A&B", "C", "Extra Title"], + title=["A&B", "C", "D", "Extra Title"], ) def test_plot_bar_label_count_expected_success(): - df = DataFrame([(30, 10, 10), (20, 20, 20), (10, 30, 30)], columns=list("ABC")) - df.plot(subplots=[("A", "B")], kind="bar", stacked=True, title=["A&B", "C"]) + df = DataFrame( + [(30, 10, 10, 10), (20, 20, 20, 20), (10, 30, 30, 10)], columns=list("ABCD") + ) + df.plot(subplots=[("A", "B", "D")], kind="bar", title=["A&B&D", "C"]) From abe5c3fbe14f471e87c9e97ea5a2d687e441efaf Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Tue, 29 Apr 2025 07:38:39 +0000 Subject: [PATCH 20/30] Fixed crash when "subplots=True" is used --- pandas/plotting/_matplotlib/core.py | 15 ++++++++------- pandas/tests/plotting/test_misc.py | 18 +++++++++++++++++- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 0e06cb10d2029..b93e75dcd4379 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1934,13 +1934,14 @@ def _make_plot(self, fig: Figure) -> None: self.subplots: list[Any] - if bool(self.subplots) and self.stacked: - for i, sub_plot in enumerate(self.subplots): - if len(sub_plot) <= 1: - continue - for plot in sub_plot: - _stacked_subplots_ind[int(plot)] = i - _stacked_subplots_offsets.append([0, 0]) + if type(self.subplots) != bool: + if bool(self.subplots) and self.stacked: + for i, sub_plot in enumerate(self.subplots): + if len(sub_plot) <= 1: + continue + for plot in sub_plot: + _stacked_subplots_ind[int(plot)] = i + _stacked_subplots_offsets.append([0, 0]) for i, (label, y) in enumerate(self._iter_data(data=data)): ax = self._get_ax(i) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index f4a0f1e792ae6..6431cd75c06eb 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -727,7 +727,11 @@ def _df_bar_subplot_checker(df_bar_data, df_bar_df, subplot_data_df, subplot_col ].reset_index() for i in range(len(subplot_columns)) ] - expected_total_height = df_bar_df.loc[:, subplot_columns].sum(axis=1) + + if len(subplot_columns) == 1: + expected_total_height = df_bar_df.loc[:, subplot_columns] + else: + expected_total_height = df_bar_df.loc[:, subplot_columns].sum(axis=1) for i in range(len(subplot_columns)): sliced_df = subplot_sliced_by_source[i] @@ -816,3 +820,15 @@ def test_bar_2_subplots_1_triple_stacked(df_bar_data, df_bar_df, subplot_divisio _df_bar_subplot_checker( df_bar_data, df_bar_df, subplot_data_df_list[i], subplot_division[i] ) + + +def test_bar_subplots_bool(df_bar_data, df_bar_df): + subplot_division = [("A"), ("B"), ("C"), ("D")] + ax = df_bar_df.plot(subplots=True, kind="bar", stacked=True) + subplot_data_df_list = _df_bar_xyheight_from_ax_helper( + df_bar_data, ax, subplot_division + ) + for i in range(len(subplot_data_df_list)): + _df_bar_subplot_checker( + df_bar_data, df_bar_df, subplot_data_df_list[i], subplot_division[i] + ) From 2e8fc9ac2d17783b63a9a39ca8d3ee9453e330f1 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Fri, 2 May 2025 20:37:31 +0000 Subject: [PATCH 21/30] Title check checks for subplot length if specified --- pandas/plotting/_matplotlib/core.py | 11 ++++++++++- pandas/tests/plotting/test_misc.py | 5 +++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index a217ee8a86a16..8141d41e7f729 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -802,7 +802,16 @@ def _adorn_subplots(self, fig: Figure) -> None: if self.title: if self.subplots: if is_list_like(self.title): - if len(self.title) != self.nseries: + if type(self.subplots) != bool: + if len(self.subplots) != len(self.title): + raise ValueError( + "The length of `title` must equal the number " + "of subplots if `title` of type `list` " + "and subplots is iterable.\n" + f"length of title = {len(self.title)}\n" + f"number of subplots = {len(self.subplots)}" + ) + elif len(self.title) != self.nseries: raise ValueError( "The length of `title` must equal the number " "of columns if using `title` of type `list` " diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 062e77bbe33b9..e9fd1a43ff55e 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -696,8 +696,9 @@ def test_plot_bar_label_count_expected_fail(): ) with pytest.raises( ValueError, - match="The length of `title` must equal the number of columns " - "if using `title` of type `list` and `subplots=True`.", + match="The length of `title` must equal the number " + "of subplots if `title` of type `list` " + "and subplots is iterable.\n", ): df.plot( subplots=[("A", "B")], From 9dd73d1818bb98a72b069d1302d0151507021c82 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Mon, 28 Apr 2025 21:28:41 +0000 Subject: [PATCH 22/30] Updated Test cases --- pandas/tests/plotting/test_misc.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index f4a0f1e792ae6..5392f0a22a5d7 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -683,6 +683,13 @@ def test_bar_plt_xaxis_intervalrange(self): ) +def test_plot_bar_label_count_default(): + df = DataFrame( + [(30, 10, 10, 10), (20, 20, 20, 20), (10, 30, 30, 10)], columns=list("ABCD") + ) + df.plot(subplots=True, kind="bar", title=["A", "B", "C", "D"]) + + @pytest.fixture def df_bar_data(): return np.random.default_rng(3).integers(0, 100, 5) @@ -816,3 +823,24 @@ def test_bar_2_subplots_1_triple_stacked(df_bar_data, df_bar_df, subplot_divisio _df_bar_subplot_checker( df_bar_data, df_bar_df, subplot_data_df_list[i], subplot_division[i] ) +def test_plot_bar_label_count_expected_fail(): + df = DataFrame( + [(30, 10, 10, 10), (20, 20, 20, 20), (10, 30, 30, 10)], columns=list("ABCD") + ) + with pytest.raises( + ValueError, + match="The length of `title` must equal the number of columns " + "if using `title` of type `list` and `subplots=True`.", + ): + df.plot( + subplots=[("A", "B")], + kind="bar", + title=["A&B", "C", "D", "Extra Title"], + ) + + +def test_plot_bar_label_count_expected_success(): + df = DataFrame( + [(30, 10, 10, 10), (20, 20, 20, 20), (10, 30, 30, 10)], columns=list("ABCD") + ) + df.plot(subplots=[("A", "B", "D")], kind="bar", title=["A&B&D", "C"]) From 7e50fa02cd77f8c59bfa5f1963587b32172b7623 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Fri, 2 May 2025 20:37:31 +0000 Subject: [PATCH 23/30] Title check checks for subplot length if specified --- pandas/plotting/_matplotlib/core.py | 11 ++++++++++- pandas/tests/plotting/test_misc.py | 5 +++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 0e06cb10d2029..70c7775758db3 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -802,7 +802,16 @@ def _adorn_subplots(self, fig: Figure) -> None: if self.title: if self.subplots: if is_list_like(self.title): - if len(self.title) != self.nseries: + if type(self.subplots) != bool: + if len(self.subplots) != len(self.title): + raise ValueError( + "The length of `title` must equal the number " + "of subplots if `title` of type `list` " + "and subplots is iterable.\n" + f"length of title = {len(self.title)}\n" + f"number of subplots = {len(self.subplots)}" + ) + elif len(self.title) != self.nseries: raise ValueError( "The length of `title` must equal the number " "of columns if using `title` of type `list` " diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 5392f0a22a5d7..ff849689952a3 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -829,8 +829,9 @@ def test_plot_bar_label_count_expected_fail(): ) with pytest.raises( ValueError, - match="The length of `title` must equal the number of columns " - "if using `title` of type `list` and `subplots=True`.", + match="The length of `title` must equal the number " + "of subplots if `title` of type `list` " + "and subplots is iterable.\n", ): df.plot( subplots=[("A", "B")], From d501e499011418dc8f035c7ae828d1318e0cc86e Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Fri, 2 May 2025 21:01:19 +0000 Subject: [PATCH 24/30] Updated test name --- pandas/tests/plotting/test_misc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 6431cd75c06eb..89dc5a1b57808 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -822,7 +822,7 @@ def test_bar_2_subplots_1_triple_stacked(df_bar_data, df_bar_df, subplot_divisio ) -def test_bar_subplots_bool(df_bar_data, df_bar_df): +def test_bar_subplots__stacking_bool(df_bar_data, df_bar_df): subplot_division = [("A"), ("B"), ("C"), ("D")] ax = df_bar_df.plot(subplots=True, kind="bar", stacked=True) subplot_data_df_list = _df_bar_xyheight_from_ax_helper( From 584a4a9604da4ff26ea4c13f632d8dbda4e75f95 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Fri, 2 May 2025 21:01:52 +0000 Subject: [PATCH 25/30] Removed extra '_' in test name --- pandas/tests/plotting/test_misc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 89dc5a1b57808..7cfc327cc8884 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -822,7 +822,7 @@ def test_bar_2_subplots_1_triple_stacked(df_bar_data, df_bar_df, subplot_divisio ) -def test_bar_subplots__stacking_bool(df_bar_data, df_bar_df): +def test_bar_subplots_stacking_bool(df_bar_data, df_bar_df): subplot_division = [("A"), ("B"), ("C"), ("D")] ax = df_bar_df.plot(subplots=True, kind="bar", stacked=True) subplot_data_df_list = _df_bar_xyheight_from_ax_helper( From 16b6792ebca989fd10eafc327be2ebfd68311c1b Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Fri, 2 May 2025 21:48:24 +0000 Subject: [PATCH 26/30] Fixed issue where expected_total_height returns as a df instead of a series --- pandas/tests/plotting/test_misc.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 7cfc327cc8884..d37aae9483f61 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -729,7 +729,7 @@ def _df_bar_subplot_checker(df_bar_data, df_bar_df, subplot_data_df, subplot_col ] if len(subplot_columns) == 1: - expected_total_height = df_bar_df.loc[:, subplot_columns] + expected_total_height = df_bar_df.loc[:, subplot_columns[0]] else: expected_total_height = df_bar_df.loc[:, subplot_columns].sum(axis=1) @@ -747,7 +747,6 @@ def _df_bar_subplot_checker(df_bar_data, df_bar_df, subplot_data_df, subplot_col tm.assert_series_equal( height_iter, expected_total_height, check_names=False, check_dtype=False ) - else: # Checks each preceding bar ends where the next one starts next_start_coord = subplot_sliced_by_source[i + 1]["y_coord"] From 11c7a8878ef317b3f78bb5e3f6841f90eaf27ee6 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Fri, 2 May 2025 21:55:28 +0000 Subject: [PATCH 27/30] Updated change notes --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 0e779ff470fcc..486652f34e9f6 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -800,6 +800,7 @@ Plotting - Bug in :meth:`DataFrame.plot.bar` with ``stacked=True`` where labels on stacked bars with zero-height segments were incorrectly positioned at the base instead of the label position of the previous segment (:issue:`59429`) - Bug in :meth:`DataFrame.plot.line` raising ``ValueError`` when set both color and a ``dict`` style (:issue:`59461`) - Bug in :meth:`DataFrame.plot` that causes a shift to the right when the frequency multiplier is greater than one. (:issue:`57587`) +- Bug in :meth:`DataFrame.plot` where ``title`` would require more titles than needed when graphing more than one column per subplot. (:issue:`61019`) - Bug in :meth:`Series.plot` preventing a line and bar from being aligned on the same plot (:issue:`61161`) - Bug in :meth:`Series.plot` preventing a line and scatter plot from being aligned (:issue:`61005`) - Bug in :meth:`Series.plot` with ``kind="pie"`` with :class:`ArrowDtype` (:issue:`59192`) From c8959163042a9fb9d6a2a2bc26fdb0e54c93bfe4 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Fri, 2 May 2025 23:49:08 +0000 Subject: [PATCH 28/30] Addressed mypy error --- pandas/plotting/_matplotlib/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 37717de8d797a..689b0bc392627 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1943,7 +1943,7 @@ def _make_plot(self, fig: Figure) -> None: self.subplots: list[Any] - if type(self.subplots) != bool: + if not isinstance(self.subplots, bool): if bool(self.subplots) and self.stacked: for i, sub_plot in enumerate(self.subplots): if len(sub_plot) <= 1: From 878e610a24b56652283b09b70c4a0511f004b298 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Tue, 6 May 2025 17:56:08 +0000 Subject: [PATCH 29/30] Addresed PR comments --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/plotting/_matplotlib/core.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 486652f34e9f6..e6debaca71251 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -800,7 +800,7 @@ Plotting - Bug in :meth:`DataFrame.plot.bar` with ``stacked=True`` where labels on stacked bars with zero-height segments were incorrectly positioned at the base instead of the label position of the previous segment (:issue:`59429`) - Bug in :meth:`DataFrame.plot.line` raising ``ValueError`` when set both color and a ``dict`` style (:issue:`59461`) - Bug in :meth:`DataFrame.plot` that causes a shift to the right when the frequency multiplier is greater than one. (:issue:`57587`) -- Bug in :meth:`DataFrame.plot` where ``title`` would require more titles than needed when graphing more than one column per subplot. (:issue:`61019`) +- Bug in :meth:`DataFrame.plot` where ``title`` would require extra titles when plotting more than one column per subplot. (:issue:`61019`) - Bug in :meth:`Series.plot` preventing a line and bar from being aligned on the same plot (:issue:`61161`) - Bug in :meth:`Series.plot` preventing a line and scatter plot from being aligned (:issue:`61005`) - Bug in :meth:`Series.plot` with ``kind="pie"`` with :class:`ArrowDtype` (:issue:`59192`) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 689b0bc392627..7109ab294a114 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -802,14 +802,14 @@ def _adorn_subplots(self, fig: Figure) -> None: if self.title: if self.subplots: if is_list_like(self.title): - if type(self.subplots) != bool: + if not isinstance(self.subplots, bool): if len(self.subplots) != len(self.title): raise ValueError( "The length of `title` must equal the number " "of subplots if `title` of type `list` " "and subplots is iterable.\n" - f"length of title = {len(self.title)}\n" - f"number of subplots = {len(self.subplots)}" + f"The number of titles ({len(self.title)}) must equal " + f"the number of subplots ({len(self.subplots)})." ) elif len(self.title) != self.nseries: raise ValueError( From b93eedb00bce3b60334072cf8ac438b6ef6bbd4c Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Tue, 6 May 2025 21:24:32 +0000 Subject: [PATCH 30/30] Changed exception message raised and updated test case --- pandas/plotting/_matplotlib/core.py | 3 --- pandas/tests/plotting/test_misc.py | 12 ++++++------ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 7109ab294a114..1c7e1ab57b2a9 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -805,9 +805,6 @@ def _adorn_subplots(self, fig: Figure) -> None: if not isinstance(self.subplots, bool): if len(self.subplots) != len(self.title): raise ValueError( - "The length of `title` must equal the number " - "of subplots if `title` of type `list` " - "and subplots is iterable.\n" f"The number of titles ({len(self.title)}) must equal " f"the number of subplots ({len(self.subplots)})." ) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 5fecb4c9f5bc3..d3e1d7f60384b 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -31,6 +31,8 @@ plt = pytest.importorskip("matplotlib.pyplot") cm = pytest.importorskip("matplotlib.cm") +import re + from pandas.plotting._matplotlib.style import get_standard_colors @@ -844,12 +846,10 @@ def test_plot_bar_label_count_expected_fail(): df = DataFrame( [(30, 10, 10, 10), (20, 20, 20, 20), (10, 30, 30, 10)], columns=list("ABCD") ) - with pytest.raises( - ValueError, - match="The length of `title` must equal the number " - "of subplots if `title` of type `list` " - "and subplots is iterable.\n", - ): + error_regex = re.escape( + "The number of titles (4) must equal the number of subplots (3)." + ) + with pytest.raises(ValueError, match=error_regex): df.plot( subplots=[("A", "B")], kind="bar",