Skip to content

Commit 92d7cf7

Browse files
committed
Merge pull request #9136 from qwhelan/master
Remove codepath asymmetry in dataframe count()
2 parents 9b453e0 + 49e27cc commit 92d7cf7

File tree

3 files changed

+26
-12
lines changed

3 files changed

+26
-12
lines changed

doc/source/whatsnew/v0.16.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ Performance
6565
- ``DataFrame.to_json`` 30x performance improvement for mixed dtype frames. (:issue:`9037`)
6666
- Performance improvements in ``MultiIndex.duplicated`` by working with labels instead of values (:issue:`9125`)
6767
- Improved the speed of `nunique` by calling `unique` instead of `value_counts` (:issue:`9129`, :issue:`7771`)
68+
- Performance improvement of up to 10x in ``DataFrame.count`` and ``DataFrame.dropna`` by taking advantage of homogeneous/heterogeneous dtypes appropriately (:issue:`9136`)
6869

6970
Bug Fixes
7071
~~~~~~~~~

pandas/core/frame.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4094,11 +4094,11 @@ def count(self, axis=0, level=None, numeric_only=False):
40944094
if len(frame._get_axis(axis)) == 0:
40954095
result = Series(0, index=frame._get_agg_axis(axis))
40964096
else:
4097-
if axis == 1:
4098-
counts = notnull(frame.values).sum(1)
4099-
result = Series(counts, index=frame._get_agg_axis(axis))
4100-
else:
4097+
if frame._is_mixed_type:
41014098
result = notnull(frame).sum(axis=axis)
4099+
else:
4100+
counts = notnull(frame.values).sum(axis=axis)
4101+
result = Series(counts, index=frame._get_agg_axis(axis))
41024102

41034103
return result.astype('int64')
41044104

vb_suite/frame_methods.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -290,30 +290,43 @@ def f(K=100):
290290
start_date=datetime(2012,1,1))
291291

292292
## dropna
293-
setup = common_setup + """
293+
dropna_setup = common_setup + """
294294
data = np.random.randn(10000, 1000)
295295
df = DataFrame(data)
296296
df.ix[50:1000,20:50] = np.nan
297297
df.ix[2000:3000] = np.nan
298298
df.ix[:,60:70] = np.nan
299299
"""
300-
frame_dropna_axis0_any = Benchmark('df.dropna(how="any",axis=0)', setup,
300+
frame_dropna_axis0_any = Benchmark('df.dropna(how="any",axis=0)', dropna_setup,
301301
start_date=datetime(2012,1,1))
302-
frame_dropna_axis0_all = Benchmark('df.dropna(how="all",axis=0)', setup,
302+
frame_dropna_axis0_all = Benchmark('df.dropna(how="all",axis=0)', dropna_setup,
303303
start_date=datetime(2012,1,1))
304304

305-
setup = common_setup + """
305+
frame_dropna_axis1_any = Benchmark('df.dropna(how="any",axis=1)', dropna_setup,
306+
start_date=datetime(2012,1,1))
307+
308+
frame_dropna_axis1_all = Benchmark('df.dropna(how="all",axis=1)', dropna_setup,
309+
start_date=datetime(2012,1,1))
310+
311+
# dropna on mixed dtypes
312+
dropna_mixed_setup = common_setup + """
306313
data = np.random.randn(10000, 1000)
307314
df = DataFrame(data)
308315
df.ix[50:1000,20:50] = np.nan
309316
df.ix[2000:3000] = np.nan
310317
df.ix[:,60:70] = np.nan
318+
df['foo'] = 'bar'
311319
"""
312-
frame_dropna_axis1_any = Benchmark('df.dropna(how="any",axis=1)', setup,
313-
start_date=datetime(2012,1,1))
320+
frame_dropna_axis0_any_mixed_dtypes = Benchmark('df.dropna(how="any",axis=0)', dropna_mixed_setup,
321+
start_date=datetime(2012,1,1))
322+
frame_dropna_axis0_all_mixed_dtypes = Benchmark('df.dropna(how="all",axis=0)', dropna_mixed_setup,
323+
start_date=datetime(2012,1,1))
314324

315-
frame_dropna_axis1_all = Benchmark('df.dropna(how="all",axis=1)', setup,
316-
start_date=datetime(2012,1,1))
325+
frame_dropna_axis1_any_mixed_dtypes = Benchmark('df.dropna(how="any",axis=1)', dropna_mixed_setup,
326+
start_date=datetime(2012,1,1))
327+
328+
frame_dropna_axis1_all_mixed_dtypes = Benchmark('df.dropna(how="all",axis=1)', dropna_mixed_setup,
329+
start_date=datetime(2012,1,1))
317330

318331

319332
#----------------------------------------------------------------------

0 commit comments

Comments
 (0)