From d80d9459b101217bf7879add117ab5a439cf6589 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Tue, 2 Jul 2013 11:55:51 -0400 Subject: [PATCH] ENH: allow layout for grouped DataFrame histograms Also remove the slow decorator for tests that take < 100 ms time --- doc/source/release.rst | 2 + doc/source/v0.13.0.txt | 3 ++ pandas/tests/test_graphics.py | 77 +++++++++++++++++++++++++++-------- pandas/tools/plotting.py | 12 ++++-- 4 files changed, 73 insertions(+), 21 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 54fa4d30bac0a..7c09c2a6f16ac 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -46,6 +46,8 @@ pandas 0.13 - ``HDFStore`` raising an invalid ``TypeError`` rather than ``ValueError`` when appending with a different block ordering (:issue:`4096`) + - The ``by`` argument now works correctly with the ``layout`` argument + (:issue:`4102`, :issue:`4014`) in ``*.hist`` plotting methods pandas 0.12 =========== diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt index 52bd674cb7830..7f63c545c5664 100644 --- a/doc/source/v0.13.0.txt +++ b/doc/source/v0.13.0.txt @@ -21,6 +21,9 @@ Bug Fixes - ``HDFStore`` raising an invalid ``TypeError`` rather than ``ValueError`` when appending with a different block ordering (:issue:`4096`) + - The ``by`` argument now works correctly with the ``layout`` argument + (:issue:`4102`, :issue:`4014`) in ``*.hist`` plotting methods + See the :ref:`full release notes ` or issue tracker on GitHub for a complete list. diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index 1b7052bf62824..08b42d7cf8975 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -158,14 +158,12 @@ def test_bar_linewidth(self): for r in ax.patches: self.assert_(r.get_linewidth() == 2) - @slow def test_rotation(self): df = DataFrame(np.random.randn(5, 5)) ax = df.plot(rot=30) for l in ax.get_xticklabels(): self.assert_(l.get_rotation() == 30) - @slow def test_irregular_datetime(self): rng = date_range('1/1/2000', '3/1/2000') rng = rng[[0, 1, 2, 3, 5, 9, 10, 11, 12]] @@ -195,6 +193,36 @@ def test_hist(self): self.assertRaises(ValueError, self.ts.hist, by=self.ts.index, figure=fig) + @slow + def test_hist_layout(self): + n = 10 + df = DataFrame({'gender': np.array(['Male', + 'Female'])[random.randint(2, + size=n)], + 'height': random.normal(66, 4, size=n), 'weight': + random.normal(161, 32, size=n)}) + self.assertRaises(ValueError, df.height.hist, layout=(1, 1)) + self.assertRaises(ValueError, df.height.hist, layout=[1, 1]) + + @slow + def test_hist_layout_with_by(self): + import matplotlib.pyplot as plt + n = 10 + df = DataFrame({'gender': np.array(['Male', + 'Female'])[random.randint(2, + size=n)], + 'height': random.normal(66, 4, size=n), 'weight': + random.normal(161, 32, size=n), + 'category': random.randint(4, size=n)}) + _check_plot_works(df.height.hist, by=df.gender, layout=(2, 1)) + plt.close('all') + _check_plot_works(df.height.hist, by=df.gender, layout=(1, 2)) + plt.close('all') + _check_plot_works(df.weight.hist, by=df.category, layout=(1, 4)) + plt.close('all') + _check_plot_works(df.weight.hist, by=df.category, layout=(4, 1)) + plt.close('all') + def test_plot_fails_when_ax_differs_from_figure(self): from pylab import figure fig1 = figure() @@ -210,7 +238,6 @@ def test_kde(self): ax = self.ts.plot(kind='kde', logy=True) self.assert_(ax.get_yscale() == 'log') - @slow def test_kde_color(self): _skip_if_no_scipy() ax = self.ts.plot(kind='kde', logy=True, color='r') @@ -257,7 +284,6 @@ def test_partially_invalid_plot_data(self): for kind in kinds: self.assertRaises(TypeError, s.plot, kind=kind) - @slow def test_invalid_kind(self): s = Series([1, 2]) self.assertRaises(ValueError, s.plot, kind='aasdf') @@ -323,7 +349,6 @@ def test_plot(self): index=index) _check_plot_works(df.plot, title=u'\u03A3') - @slow def test_nonnumeric_exclude(self): import matplotlib.pyplot as plt plt.close('all') @@ -410,10 +435,9 @@ def test_xcompat(self): lines = ax.get_lines() self.assert_(isinstance(lines[0].get_xdata(), PeriodIndex)) - @slow def test_unsorted_index(self): - df = DataFrame({'y': range(100)}, - index=range(99, -1, -1)) + df = DataFrame({'y': np.arange(100)}, + index=np.arange(99, -1, -1)) ax = df.plot() l = ax.get_lines()[0] rs = l.get_xydata() @@ -479,7 +503,6 @@ def test_plot_bar(self): df = DataFrame({'a': [0, 1], 'b': [1, 0]}) _check_plot_works(df.plot, kind='bar') - @slow def test_bar_stacked_center(self): # GH2157 df = DataFrame({'A': [3] * 5, 'B': range(5)}, index=range(5)) @@ -487,7 +510,6 @@ def test_bar_stacked_center(self): self.assertEqual(ax.xaxis.get_ticklocs()[0], ax.patches[0].get_x() + ax.patches[0].get_width() / 2) - @slow def test_bar_center(self): df = DataFrame({'A': [3] * 5, 'B': range(5)}, index=range(5)) ax = df.plot(kind='bar', grid=True) @@ -710,7 +732,6 @@ def test_plot_int_columns(self): df = DataFrame(np.random.randn(100, 4)).cumsum() _check_plot_works(df.plot, legend=True) - @slow def test_legend_name(self): multi = DataFrame(np.random.randn(4, 4), columns=[np.array(['a', 'a', 'b', 'b']), @@ -800,7 +821,6 @@ def test_line_colors(self): plt.close('all') df.ix[:, [0]].plot(color='DodgerBlue') - @slow def test_default_color_cycle(self): import matplotlib.pyplot as plt plt.rcParams['axes.color_cycle'] = list('rgbk') @@ -815,7 +835,6 @@ def test_default_color_cycle(self): rs = l.get_color() self.assert_(xp == rs) - @slow def test_unordered_ts(self): df = DataFrame(np.array([3.0, 2.0, 1.0]), index=[date(2012, 10, 1), @@ -828,7 +847,6 @@ def test_unordered_ts(self): ydata = ax.lines[0].get_ydata() self.assert_(np.all(ydata == np.array([1.0, 2.0, 3.0]))) - @slow def test_all_invalid_plot_data(self): kinds = 'line', 'bar', 'barh', 'kde', 'density' df = DataFrame(list('abcd')) @@ -843,7 +861,6 @@ def test_partially_invalid_plot_data(self): for kind in kinds: self.assertRaises(TypeError, df.plot, kind=kind) - @slow def test_invalid_kind(self): df = DataFrame(np.random.randn(10, 2)) self.assertRaises(ValueError, df.plot, kind='aasdf') @@ -880,7 +897,6 @@ def test_boxplot(self): _check_plot_works(grouped.boxplot) _check_plot_works(grouped.boxplot, subplots=False) - @slow def test_series_plot_color_kwargs(self): # #1890 import matplotlib.pyplot as plt @@ -890,7 +906,6 @@ def test_series_plot_color_kwargs(self): line = ax.get_lines()[0] self.assert_(line.get_color() == 'green') - @slow def test_time_series_plot_color_kwargs(self): # #1890 import matplotlib.pyplot as plt @@ -901,7 +916,6 @@ def test_time_series_plot_color_kwargs(self): line = ax.get_lines()[0] self.assert_(line.get_color() == 'green') - @slow def test_time_series_plot_color_with_empty_kwargs(self): import matplotlib.pyplot as plt @@ -950,6 +964,33 @@ def test_grouped_hist(self): self.assertRaises(AttributeError, plotting.grouped_hist, df.A, by=df.C, foo='bar') + @slow + def test_grouped_hist_layout(self): + import matplotlib.pyplot as plt + n = 100 + df = DataFrame({'gender': np.array(['Male', + 'Female'])[random.randint(2, + size=n)], + 'height': random.normal(66, 4, size=n), + 'weight': random.normal(161, 32, size=n), + 'category': random.randint(4, size=n)}) + self.assertRaises(ValueError, df.hist, column='weight', by=df.gender, + layout=(1, 1)) + self.assertRaises(ValueError, df.hist, column='weight', by=df.gender, + layout=(1,)) + self.assertRaises(ValueError, df.hist, column='height', by=df.category, + layout=(1, 3)) + self.assertRaises(ValueError, df.hist, column='height', by=df.category, + layout=(2, 1)) + self.assertEqual(df.hist(column='height', by=df.gender, + layout=(2, 1)).shape, (2,)) + plt.close('all') + self.assertEqual(df.hist(column='height', by=df.category, + layout=(4, 1)).shape, (4,)) + plt.close('all') + self.assertEqual(df.hist(column='height', by=df.category, + layout=(4, 2)).shape, (4, 2)) + @slow def test_axis_shared(self): # GH4089 diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index ad305382dd8cc..1ffdf83b02763 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -1929,9 +1929,9 @@ def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, data = data[column] if by is not None: - axes = grouped_hist(data, by=by, ax=ax, grid=grid, figsize=figsize, - sharex=sharex, sharey=sharey, **kwds) + sharex=sharex, sharey=sharey, layout=layout, + **kwds) for ax in axes.ravel(): if xlabelsize is not None: @@ -2030,6 +2030,9 @@ def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None, fig.set_size_inches(*figsize, forward=True) if by is None: + if kwds.get('layout', None): + raise ValueError("The 'layout' keyword is not supported when " + "'by' is None") if ax is None: ax = fig.add_subplot(111) if ax.get_figure() != fig: @@ -2146,9 +2149,12 @@ def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True, grouped = grouped[column] ngroups = len(grouped) - nrows, ncols = layout or _get_layout(ngroups) + if nrows * ncols < ngroups: + raise ValueError("Number of plots in 'layout' must greater than or " + "equal to the number " "of groups in 'by'") + if figsize is None: # our favorite default beating matplotlib's idea of the # default size