Skip to content

Commit 2f88dca

Browse files
committed
Merge pull request #4104 from cpcloud/layout-with-by
ENH: allow layout for grouped DataFrame histograms
2 parents ebf9147 + d80d945 commit 2f88dca

File tree

4 files changed

+73
-21
lines changed

4 files changed

+73
-21
lines changed

doc/source/release.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ pandas 0.13
4646

4747
- ``HDFStore`` raising an invalid ``TypeError`` rather than ``ValueError`` when appending
4848
with a different block ordering (:issue:`4096`)
49+
- The ``by`` argument now works correctly with the ``layout`` argument
50+
(:issue:`4102`, :issue:`4014`) in ``*.hist`` plotting methods
4951

5052
pandas 0.12
5153
===========

doc/source/v0.13.0.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ Bug Fixes
2121
- ``HDFStore`` raising an invalid ``TypeError`` rather than ``ValueError`` when appending
2222
with a different block ordering (:issue:`4096`)
2323

24+
- The ``by`` argument now works correctly with the ``layout`` argument
25+
(:issue:`4102`, :issue:`4014`) in ``*.hist`` plotting methods
26+
2427
See the :ref:`full release notes
2528
<release>` or issue tracker
2629
on GitHub for a complete list.

pandas/tests/test_graphics.py

Lines changed: 59 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -158,14 +158,12 @@ def test_bar_linewidth(self):
158158
for r in ax.patches:
159159
self.assert_(r.get_linewidth() == 2)
160160

161-
@slow
162161
def test_rotation(self):
163162
df = DataFrame(np.random.randn(5, 5))
164163
ax = df.plot(rot=30)
165164
for l in ax.get_xticklabels():
166165
self.assert_(l.get_rotation() == 30)
167166

168-
@slow
169167
def test_irregular_datetime(self):
170168
rng = date_range('1/1/2000', '3/1/2000')
171169
rng = rng[[0, 1, 2, 3, 5, 9, 10, 11, 12]]
@@ -195,6 +193,36 @@ def test_hist(self):
195193
self.assertRaises(ValueError, self.ts.hist, by=self.ts.index,
196194
figure=fig)
197195

196+
@slow
197+
def test_hist_layout(self):
198+
n = 10
199+
df = DataFrame({'gender': np.array(['Male',
200+
'Female'])[random.randint(2,
201+
size=n)],
202+
'height': random.normal(66, 4, size=n), 'weight':
203+
random.normal(161, 32, size=n)})
204+
self.assertRaises(ValueError, df.height.hist, layout=(1, 1))
205+
self.assertRaises(ValueError, df.height.hist, layout=[1, 1])
206+
207+
@slow
208+
def test_hist_layout_with_by(self):
209+
import matplotlib.pyplot as plt
210+
n = 10
211+
df = DataFrame({'gender': np.array(['Male',
212+
'Female'])[random.randint(2,
213+
size=n)],
214+
'height': random.normal(66, 4, size=n), 'weight':
215+
random.normal(161, 32, size=n),
216+
'category': random.randint(4, size=n)})
217+
_check_plot_works(df.height.hist, by=df.gender, layout=(2, 1))
218+
plt.close('all')
219+
_check_plot_works(df.height.hist, by=df.gender, layout=(1, 2))
220+
plt.close('all')
221+
_check_plot_works(df.weight.hist, by=df.category, layout=(1, 4))
222+
plt.close('all')
223+
_check_plot_works(df.weight.hist, by=df.category, layout=(4, 1))
224+
plt.close('all')
225+
198226
def test_plot_fails_when_ax_differs_from_figure(self):
199227
from pylab import figure
200228
fig1 = figure()
@@ -210,7 +238,6 @@ def test_kde(self):
210238
ax = self.ts.plot(kind='kde', logy=True)
211239
self.assert_(ax.get_yscale() == 'log')
212240

213-
@slow
214241
def test_kde_color(self):
215242
_skip_if_no_scipy()
216243
ax = self.ts.plot(kind='kde', logy=True, color='r')
@@ -257,7 +284,6 @@ def test_partially_invalid_plot_data(self):
257284
for kind in kinds:
258285
self.assertRaises(TypeError, s.plot, kind=kind)
259286

260-
@slow
261287
def test_invalid_kind(self):
262288
s = Series([1, 2])
263289
self.assertRaises(ValueError, s.plot, kind='aasdf')
@@ -323,7 +349,6 @@ def test_plot(self):
323349
index=index)
324350
_check_plot_works(df.plot, title=u'\u03A3')
325351

326-
@slow
327352
def test_nonnumeric_exclude(self):
328353
import matplotlib.pyplot as plt
329354
plt.close('all')
@@ -410,10 +435,9 @@ def test_xcompat(self):
410435
lines = ax.get_lines()
411436
self.assert_(isinstance(lines[0].get_xdata(), PeriodIndex))
412437

413-
@slow
414438
def test_unsorted_index(self):
415-
df = DataFrame({'y': range(100)},
416-
index=range(99, -1, -1))
439+
df = DataFrame({'y': np.arange(100)},
440+
index=np.arange(99, -1, -1))
417441
ax = df.plot()
418442
l = ax.get_lines()[0]
419443
rs = l.get_xydata()
@@ -479,15 +503,13 @@ def test_plot_bar(self):
479503
df = DataFrame({'a': [0, 1], 'b': [1, 0]})
480504
_check_plot_works(df.plot, kind='bar')
481505

482-
@slow
483506
def test_bar_stacked_center(self):
484507
# GH2157
485508
df = DataFrame({'A': [3] * 5, 'B': range(5)}, index=range(5))
486509
ax = df.plot(kind='bar', stacked='True', grid=True)
487510
self.assertEqual(ax.xaxis.get_ticklocs()[0],
488511
ax.patches[0].get_x() + ax.patches[0].get_width() / 2)
489512

490-
@slow
491513
def test_bar_center(self):
492514
df = DataFrame({'A': [3] * 5, 'B': range(5)}, index=range(5))
493515
ax = df.plot(kind='bar', grid=True)
@@ -710,7 +732,6 @@ def test_plot_int_columns(self):
710732
df = DataFrame(np.random.randn(100, 4)).cumsum()
711733
_check_plot_works(df.plot, legend=True)
712734

713-
@slow
714735
def test_legend_name(self):
715736
multi = DataFrame(np.random.randn(4, 4),
716737
columns=[np.array(['a', 'a', 'b', 'b']),
@@ -800,7 +821,6 @@ def test_line_colors(self):
800821
plt.close('all')
801822
df.ix[:, [0]].plot(color='DodgerBlue')
802823

803-
@slow
804824
def test_default_color_cycle(self):
805825
import matplotlib.pyplot as plt
806826
plt.rcParams['axes.color_cycle'] = list('rgbk')
@@ -815,7 +835,6 @@ def test_default_color_cycle(self):
815835
rs = l.get_color()
816836
self.assert_(xp == rs)
817837

818-
@slow
819838
def test_unordered_ts(self):
820839
df = DataFrame(np.array([3.0, 2.0, 1.0]),
821840
index=[date(2012, 10, 1),
@@ -828,7 +847,6 @@ def test_unordered_ts(self):
828847
ydata = ax.lines[0].get_ydata()
829848
self.assert_(np.all(ydata == np.array([1.0, 2.0, 3.0])))
830849

831-
@slow
832850
def test_all_invalid_plot_data(self):
833851
kinds = 'line', 'bar', 'barh', 'kde', 'density'
834852
df = DataFrame(list('abcd'))
@@ -843,7 +861,6 @@ def test_partially_invalid_plot_data(self):
843861
for kind in kinds:
844862
self.assertRaises(TypeError, df.plot, kind=kind)
845863

846-
@slow
847864
def test_invalid_kind(self):
848865
df = DataFrame(np.random.randn(10, 2))
849866
self.assertRaises(ValueError, df.plot, kind='aasdf')
@@ -880,7 +897,6 @@ def test_boxplot(self):
880897
_check_plot_works(grouped.boxplot)
881898
_check_plot_works(grouped.boxplot, subplots=False)
882899

883-
@slow
884900
def test_series_plot_color_kwargs(self):
885901
# #1890
886902
import matplotlib.pyplot as plt
@@ -890,7 +906,6 @@ def test_series_plot_color_kwargs(self):
890906
line = ax.get_lines()[0]
891907
self.assert_(line.get_color() == 'green')
892908

893-
@slow
894909
def test_time_series_plot_color_kwargs(self):
895910
# #1890
896911
import matplotlib.pyplot as plt
@@ -901,7 +916,6 @@ def test_time_series_plot_color_kwargs(self):
901916
line = ax.get_lines()[0]
902917
self.assert_(line.get_color() == 'green')
903918

904-
@slow
905919
def test_time_series_plot_color_with_empty_kwargs(self):
906920
import matplotlib.pyplot as plt
907921

@@ -950,6 +964,33 @@ def test_grouped_hist(self):
950964
self.assertRaises(AttributeError, plotting.grouped_hist, df.A,
951965
by=df.C, foo='bar')
952966

967+
@slow
968+
def test_grouped_hist_layout(self):
969+
import matplotlib.pyplot as plt
970+
n = 100
971+
df = DataFrame({'gender': np.array(['Male',
972+
'Female'])[random.randint(2,
973+
size=n)],
974+
'height': random.normal(66, 4, size=n),
975+
'weight': random.normal(161, 32, size=n),
976+
'category': random.randint(4, size=n)})
977+
self.assertRaises(ValueError, df.hist, column='weight', by=df.gender,
978+
layout=(1, 1))
979+
self.assertRaises(ValueError, df.hist, column='weight', by=df.gender,
980+
layout=(1,))
981+
self.assertRaises(ValueError, df.hist, column='height', by=df.category,
982+
layout=(1, 3))
983+
self.assertRaises(ValueError, df.hist, column='height', by=df.category,
984+
layout=(2, 1))
985+
self.assertEqual(df.hist(column='height', by=df.gender,
986+
layout=(2, 1)).shape, (2,))
987+
plt.close('all')
988+
self.assertEqual(df.hist(column='height', by=df.category,
989+
layout=(4, 1)).shape, (4,))
990+
plt.close('all')
991+
self.assertEqual(df.hist(column='height', by=df.category,
992+
layout=(4, 2)).shape, (4, 2))
993+
953994
@slow
954995
def test_axis_shared(self):
955996
# GH4089

pandas/tools/plotting.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1929,9 +1929,9 @@ def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None,
19291929
data = data[column]
19301930

19311931
if by is not None:
1932-
19331932
axes = grouped_hist(data, by=by, ax=ax, grid=grid, figsize=figsize,
1934-
sharex=sharex, sharey=sharey, **kwds)
1933+
sharex=sharex, sharey=sharey, layout=layout,
1934+
**kwds)
19351935

19361936
for ax in axes.ravel():
19371937
if xlabelsize is not None:
@@ -2030,6 +2030,9 @@ def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None,
20302030
fig.set_size_inches(*figsize, forward=True)
20312031

20322032
if by is None:
2033+
if kwds.get('layout', None):
2034+
raise ValueError("The 'layout' keyword is not supported when "
2035+
"'by' is None")
20332036
if ax is None:
20342037
ax = fig.add_subplot(111)
20352038
if ax.get_figure() != fig:
@@ -2146,9 +2149,12 @@ def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True,
21462149
grouped = grouped[column]
21472150

21482151
ngroups = len(grouped)
2149-
21502152
nrows, ncols = layout or _get_layout(ngroups)
21512153

2154+
if nrows * ncols < ngroups:
2155+
raise ValueError("Number of plots in 'layout' must greater than or "
2156+
"equal to the number " "of groups in 'by'")
2157+
21522158
if figsize is None:
21532159
# our favorite default beating matplotlib's idea of the
21542160
# default size

0 commit comments

Comments
 (0)