Skip to content

ENH: allow layout for grouped DataFrame histograms #4104

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 25, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ pandas 0.13

- ``HDFStore`` raising an invalid ``TypeError`` rather than ``ValueError`` when appending
with a different block ordering (:issue:`4096`)
- The ``by`` argument now works correctly with the ``layout`` argument
(:issue:`4102`, :issue:`4014`) in ``*.hist`` plotting methods

pandas 0.12
===========
Expand Down
3 changes: 3 additions & 0 deletions doc/source/v0.13.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ Bug Fixes
- ``HDFStore`` raising an invalid ``TypeError`` rather than ``ValueError`` when appending
with a different block ordering (:issue:`4096`)

- The ``by`` argument now works correctly with the ``layout`` argument
(:issue:`4102`, :issue:`4014`) in ``*.hist`` plotting methods

See the :ref:`full release notes
<release>` or issue tracker
on GitHub for a complete list.
77 changes: 59 additions & 18 deletions pandas/tests/test_graphics.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,14 +158,12 @@ def test_bar_linewidth(self):
for r in ax.patches:
self.assert_(r.get_linewidth() == 2)

@slow
def test_rotation(self):
df = DataFrame(np.random.randn(5, 5))
ax = df.plot(rot=30)
for l in ax.get_xticklabels():
self.assert_(l.get_rotation() == 30)

@slow
def test_irregular_datetime(self):
rng = date_range('1/1/2000', '3/1/2000')
rng = rng[[0, 1, 2, 3, 5, 9, 10, 11, 12]]
Expand Down Expand Up @@ -195,6 +193,36 @@ def test_hist(self):
self.assertRaises(ValueError, self.ts.hist, by=self.ts.index,
figure=fig)

@slow
def test_hist_layout(self):
n = 10
df = DataFrame({'gender': np.array(['Male',
'Female'])[random.randint(2,
size=n)],
'height': random.normal(66, 4, size=n), 'weight':
random.normal(161, 32, size=n)})
self.assertRaises(ValueError, df.height.hist, layout=(1, 1))
self.assertRaises(ValueError, df.height.hist, layout=[1, 1])

@slow
def test_hist_layout_with_by(self):
import matplotlib.pyplot as plt
n = 10
df = DataFrame({'gender': np.array(['Male',
'Female'])[random.randint(2,
size=n)],
'height': random.normal(66, 4, size=n), 'weight':
random.normal(161, 32, size=n),
'category': random.randint(4, size=n)})
_check_plot_works(df.height.hist, by=df.gender, layout=(2, 1))
plt.close('all')
_check_plot_works(df.height.hist, by=df.gender, layout=(1, 2))
plt.close('all')
_check_plot_works(df.weight.hist, by=df.category, layout=(1, 4))
plt.close('all')
_check_plot_works(df.weight.hist, by=df.category, layout=(4, 1))
plt.close('all')

def test_plot_fails_when_ax_differs_from_figure(self):
from pylab import figure
fig1 = figure()
Expand All @@ -210,7 +238,6 @@ def test_kde(self):
ax = self.ts.plot(kind='kde', logy=True)
self.assert_(ax.get_yscale() == 'log')

@slow
def test_kde_color(self):
_skip_if_no_scipy()
ax = self.ts.plot(kind='kde', logy=True, color='r')
Expand Down Expand Up @@ -257,7 +284,6 @@ def test_partially_invalid_plot_data(self):
for kind in kinds:
self.assertRaises(TypeError, s.plot, kind=kind)

@slow
def test_invalid_kind(self):
s = Series([1, 2])
self.assertRaises(ValueError, s.plot, kind='aasdf')
Expand Down Expand Up @@ -323,7 +349,6 @@ def test_plot(self):
index=index)
_check_plot_works(df.plot, title=u'\u03A3')

@slow
def test_nonnumeric_exclude(self):
import matplotlib.pyplot as plt
plt.close('all')
Expand Down Expand Up @@ -410,10 +435,9 @@ def test_xcompat(self):
lines = ax.get_lines()
self.assert_(isinstance(lines[0].get_xdata(), PeriodIndex))

@slow
def test_unsorted_index(self):
df = DataFrame({'y': range(100)},
index=range(99, -1, -1))
df = DataFrame({'y': np.arange(100)},
index=np.arange(99, -1, -1))
ax = df.plot()
l = ax.get_lines()[0]
rs = l.get_xydata()
Expand Down Expand Up @@ -479,15 +503,13 @@ def test_plot_bar(self):
df = DataFrame({'a': [0, 1], 'b': [1, 0]})
_check_plot_works(df.plot, kind='bar')

@slow
def test_bar_stacked_center(self):
# GH2157
df = DataFrame({'A': [3] * 5, 'B': range(5)}, index=range(5))
ax = df.plot(kind='bar', stacked='True', grid=True)
self.assertEqual(ax.xaxis.get_ticklocs()[0],
ax.patches[0].get_x() + ax.patches[0].get_width() / 2)

@slow
def test_bar_center(self):
df = DataFrame({'A': [3] * 5, 'B': range(5)}, index=range(5))
ax = df.plot(kind='bar', grid=True)
Expand Down Expand Up @@ -710,7 +732,6 @@ def test_plot_int_columns(self):
df = DataFrame(np.random.randn(100, 4)).cumsum()
_check_plot_works(df.plot, legend=True)

@slow
def test_legend_name(self):
multi = DataFrame(np.random.randn(4, 4),
columns=[np.array(['a', 'a', 'b', 'b']),
Expand Down Expand Up @@ -800,7 +821,6 @@ def test_line_colors(self):
plt.close('all')
df.ix[:, [0]].plot(color='DodgerBlue')

@slow
def test_default_color_cycle(self):
import matplotlib.pyplot as plt
plt.rcParams['axes.color_cycle'] = list('rgbk')
Expand All @@ -815,7 +835,6 @@ def test_default_color_cycle(self):
rs = l.get_color()
self.assert_(xp == rs)

@slow
def test_unordered_ts(self):
df = DataFrame(np.array([3.0, 2.0, 1.0]),
index=[date(2012, 10, 1),
Expand All @@ -828,7 +847,6 @@ def test_unordered_ts(self):
ydata = ax.lines[0].get_ydata()
self.assert_(np.all(ydata == np.array([1.0, 2.0, 3.0])))

@slow
def test_all_invalid_plot_data(self):
kinds = 'line', 'bar', 'barh', 'kde', 'density'
df = DataFrame(list('abcd'))
Expand All @@ -843,7 +861,6 @@ def test_partially_invalid_plot_data(self):
for kind in kinds:
self.assertRaises(TypeError, df.plot, kind=kind)

@slow
def test_invalid_kind(self):
df = DataFrame(np.random.randn(10, 2))
self.assertRaises(ValueError, df.plot, kind='aasdf')
Expand Down Expand Up @@ -880,7 +897,6 @@ def test_boxplot(self):
_check_plot_works(grouped.boxplot)
_check_plot_works(grouped.boxplot, subplots=False)

@slow
def test_series_plot_color_kwargs(self):
# #1890
import matplotlib.pyplot as plt
Expand All @@ -890,7 +906,6 @@ def test_series_plot_color_kwargs(self):
line = ax.get_lines()[0]
self.assert_(line.get_color() == 'green')

@slow
def test_time_series_plot_color_kwargs(self):
# #1890
import matplotlib.pyplot as plt
Expand All @@ -901,7 +916,6 @@ def test_time_series_plot_color_kwargs(self):
line = ax.get_lines()[0]
self.assert_(line.get_color() == 'green')

@slow
def test_time_series_plot_color_with_empty_kwargs(self):
import matplotlib.pyplot as plt

Expand Down Expand Up @@ -950,6 +964,33 @@ def test_grouped_hist(self):
self.assertRaises(AttributeError, plotting.grouped_hist, df.A,
by=df.C, foo='bar')

@slow
def test_grouped_hist_layout(self):
import matplotlib.pyplot as plt
n = 100
df = DataFrame({'gender': np.array(['Male',
'Female'])[random.randint(2,
size=n)],
'height': random.normal(66, 4, size=n),
'weight': random.normal(161, 32, size=n),
'category': random.randint(4, size=n)})
self.assertRaises(ValueError, df.hist, column='weight', by=df.gender,
layout=(1, 1))
self.assertRaises(ValueError, df.hist, column='weight', by=df.gender,
layout=(1,))
self.assertRaises(ValueError, df.hist, column='height', by=df.category,
layout=(1, 3))
self.assertRaises(ValueError, df.hist, column='height', by=df.category,
layout=(2, 1))
self.assertEqual(df.hist(column='height', by=df.gender,
layout=(2, 1)).shape, (2,))
plt.close('all')
self.assertEqual(df.hist(column='height', by=df.category,
layout=(4, 1)).shape, (4,))
plt.close('all')
self.assertEqual(df.hist(column='height', by=df.category,
layout=(4, 2)).shape, (4, 2))

@slow
def test_axis_shared(self):
# GH4089
Expand Down
12 changes: 9 additions & 3 deletions pandas/tools/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -1929,9 +1929,9 @@ def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None,
data = data[column]

if by is not None:

axes = grouped_hist(data, by=by, ax=ax, grid=grid, figsize=figsize,
sharex=sharex, sharey=sharey, **kwds)
sharex=sharex, sharey=sharey, layout=layout,
**kwds)

for ax in axes.ravel():
if xlabelsize is not None:
Expand Down Expand Up @@ -2030,6 +2030,9 @@ def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None,
fig.set_size_inches(*figsize, forward=True)

if by is None:
if kwds.get('layout', None):
raise ValueError("The 'layout' keyword is not supported when "
"'by' is None")
if ax is None:
ax = fig.add_subplot(111)
if ax.get_figure() != fig:
Expand Down Expand Up @@ -2146,9 +2149,12 @@ def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True,
grouped = grouped[column]

ngroups = len(grouped)

nrows, ncols = layout or _get_layout(ngroups)

if nrows * ncols < ngroups:
raise ValueError("Number of plots in 'layout' must greater than or "
"equal to the number " "of groups in 'by'")

if figsize is None:
# our favorite default beating matplotlib's idea of the
# default size
Expand Down