diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index cb237b93c70ba..e81aaebe77807 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4562,7 +4562,7 @@ def _count_level(self, level, axis=0, numeric_only=False):
 
         level_index = count_axis.levels[level]
         labels = com._ensure_int64(count_axis.labels[level])
-        counts = lib.count_level_2d(mask, labels, len(level_index))
+        counts = lib.count_level_2d(mask, labels, len(level_index), axis=0)
 
         result = DataFrame(counts, index=level_index,
                            columns=agg_axis)
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 43110494d675b..1f5855e63dee8 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -69,7 +69,7 @@
     'cumsum', 'cumprod', 'cummin', 'cummax', 'cumcount',
     'resample',
     'describe',
-    'rank', 'quantile', 'count',
+    'rank', 'quantile',
     'fillna',
     'mad',
     'any', 'all',
@@ -149,9 +149,6 @@ def _last(x):
         return _last(x)
 
 
-def _count_compat(x, axis=0):
-    return x.count()  # .size != .count(); count excludes nan
-
 class Grouper(object):
     """
     A Grouper allows the user to specify a groupby instruction for a target object
@@ -801,11 +798,6 @@ def size(self):
                               numeric_only=False, _convert=True)
     last = _groupby_function('last', 'last', _last_compat, numeric_only=False,
                              _convert=True)
-    _count = _groupby_function('_count', 'count', _count_compat,
-                               numeric_only=False)
-
-    def count(self, axis=0):
-        return self._count().astype('int64')
 
     def ohlc(self):
         """
@@ -1463,7 +1455,6 @@ def get_group_levels(self):
             'f': lambda func, a, b, c, d: func(a, b, c, d, 1)
         },
         'last': 'group_last',
-        'count': 'group_count',
     }
 
     _cython_arity = {
@@ -3468,6 +3459,24 @@ def _apply_to_column_groupbys(self, func):
              in self._iterate_column_groupbys()),
             keys=self._selected_obj.columns, axis=1)
 
+    def count(self):
+        from functools import partial
+        from pandas.lib import count_level_2d
+        from pandas.core.common import _isnull_ndarraylike as isnull
+
+        data, _ = self._get_data_to_aggregate()
+        ids, _, ngroups = self.grouper.group_info
+        mask = ids != -1
+
+        val = ((mask & ~isnull(blk.get_values())) for blk in data.blocks)
+        loc = (blk.mgr_locs for blk in data.blocks)
+
+        counter = partial(count_level_2d, labels=ids, max_bin=ngroups, axis=1)
+        blk = map(make_block, map(counter, val), loc)
+
+        return self._wrap_agged_blocks(data.items, list(blk))
+
+
 from pandas.tools.plotting import boxplot_frame_groupby
 DataFrameGroupBy.boxplot = boxplot_frame_groupby
 
diff --git a/pandas/lib.pyx b/pandas/lib.pyx
index 7b2d849695c98..2b4974155d44c 100644
--- a/pandas/lib.pyx
+++ b/pandas/lib.pyx
@@ -1253,19 +1253,32 @@ def lookup_values(ndarray[object] values, dict mapping):
     return maybe_convert_objects(result)
 
 
+@cython.boundscheck(False)
+@cython.wraparound(False)
 def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask,
-                   ndarray[int64_t] labels, Py_ssize_t max_bin):
+                   ndarray[int64_t, ndim=1] labels,
+                   Py_ssize_t max_bin,
+                   int axis):
     cdef:
         Py_ssize_t i, j, k, n
         ndarray[int64_t, ndim=2] counts
 
+    assert(axis == 0 or axis == 1)
     n, k = (<object> mask).shape
-    counts = np.zeros((max_bin, k), dtype='i8')
 
-    for i from 0 <= i < n:
-        for j from 0 <= j < k:
-            if mask[i, j]:
-                counts[labels[i], j] += 1
+    if axis == 0:
+        counts = np.zeros((max_bin, k), dtype='i8')
+        with nogil:
+            for i from 0 <= i < n:
+                for j from 0 <= j < k:
+                    counts[labels[i], j] += mask[i, j]
+
+    else:  # axis == 1
+        counts = np.zeros((n, max_bin), dtype='i8')
+        with nogil:
+            for i from 0 <= i < n:
+                for j from 0 <= j < k:
+                    counts[i, labels[j]] += mask[i, j]
 
     return counts
 
diff --git a/pandas/src/generate_code.py b/pandas/src/generate_code.py
index c086919d94644..b055d75df4cf4 100644
--- a/pandas/src/generate_code.py
+++ b/pandas/src/generate_code.py
@@ -971,44 +971,6 @@ def group_var_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
 
 """
 
-group_count_template = """@cython.boundscheck(False)
-@cython.wraparound(False)
-def group_count_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
-                         ndarray[int64_t] counts,
-                         ndarray[%(c_type)s, ndim=2] values,
-                         ndarray[int64_t] labels):
-    '''
-    Only aggregates on axis=0
-    '''
-    cdef:
-        Py_ssize_t i, j, lab, ncounts = len(counts)
-        Py_ssize_t N = values.shape[0], K = values.shape[1]
-        %(c_type)s val
-        ndarray[int64_t, ndim=2] nobs = np.zeros((out.shape[0], out.shape[1]),
-                                                 dtype=np.int64)
-
-    if len(values) != len(labels):
-        raise AssertionError("len(index) != len(labels)")
-
-
-    %(nogil)s
-    %(tab)sfor i in range(N):
-    %(tab)s    lab = labels[i]
-    %(tab)s    if lab < 0:
-    %(tab)s        continue
-
-    %(tab)s    counts[lab] += 1
-    %(tab)s    for j in range(K):
-    %(tab)s        val = values[i, j]
-
-    %(tab)s        # not nan
-    %(tab)s        nobs[lab, j] += val == val and val != iNaT
-
-    %(tab)sfor i in range(ncounts):
-    %(tab)s    for j in range(K):
-    %(tab)s        out[i, j] = nobs[i, j]
-"""
-
 # add passing bin edges, instead of labels
 
 
@@ -1995,8 +1957,6 @@ def generate_from_template(template, exclude=None):
 groupby_min_max = [group_min_template,
                    group_max_template]
 
-groupby_count = [group_count_template]
-
 templates_1d = [map_indices_template,
                 pad_template,
                 backfill_template,
@@ -2051,12 +2011,6 @@ def generate_take_cython_file():
             print(generate_put_min_max_template(template, use_ints=True),
                   file=f)
 
-        for template in groupby_count:
-            print(generate_put_selection_template(template, use_ints=True,
-                                                  use_datelikes=True,
-                                                  use_objects=True),
-                                                  file=f)
-
         for template in nobool_1d_templates:
             print(generate_from_template(template, exclude=['bool']), file=f)
 
diff --git a/pandas/src/generated.pyx b/pandas/src/generated.pyx
index c0ecd04749e58..2f2fd528999d6 100644
--- a/pandas/src/generated.pyx
+++ b/pandas/src/generated.pyx
@@ -7930,192 +7930,6 @@ def group_max_int64(ndarray[int64_t, ndim=2] out,
                     out[i, j] = maxx[i, j]
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def group_count_float64(ndarray[float64_t, ndim=2] out,
-                         ndarray[int64_t] counts,
-                         ndarray[float64_t, ndim=2] values,
-                         ndarray[int64_t] labels):
-    '''
-    Only aggregates on axis=0
-    '''
-    cdef:
-        Py_ssize_t i, j, lab, ncounts = len(counts)
-        Py_ssize_t N = values.shape[0], K = values.shape[1]
-        float64_t val
-        ndarray[int64_t, ndim=2] nobs = np.zeros((out.shape[0], out.shape[1]),
-                                                 dtype=np.int64)
-
-    if len(values) != len(labels):
-        raise AssertionError("len(index) != len(labels)")
-
-
-    with nogil:
-        for i in range(N):
-            lab = labels[i]
-            if lab < 0:
-                continue
-
-            counts[lab] += 1
-            for j in range(K):
-                val = values[i, j]
-
-                # not nan
-                nobs[lab, j] += val == val and val != iNaT
-
-        for i in range(ncounts):
-            for j in range(K):
-                out[i, j] = nobs[i, j]
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def group_count_float32(ndarray[float32_t, ndim=2] out,
-                         ndarray[int64_t] counts,
-                         ndarray[float32_t, ndim=2] values,
-                         ndarray[int64_t] labels):
-    '''
-    Only aggregates on axis=0
-    '''
-    cdef:
-        Py_ssize_t i, j, lab, ncounts = len(counts)
-        Py_ssize_t N = values.shape[0], K = values.shape[1]
-        float32_t val
-        ndarray[int64_t, ndim=2] nobs = np.zeros((out.shape[0], out.shape[1]),
-                                                 dtype=np.int64)
-
-    if len(values) != len(labels):
-        raise AssertionError("len(index) != len(labels)")
-
-
-    with nogil:
-        for i in range(N):
-            lab = labels[i]
-            if lab < 0:
-                continue
-
-            counts[lab] += 1
-            for j in range(K):
-                val = values[i, j]
-
-                # not nan
-                nobs[lab, j] += val == val and val != iNaT
-
-        for i in range(ncounts):
-            for j in range(K):
-                out[i, j] = nobs[i, j]
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def group_count_int64(ndarray[int64_t, ndim=2] out,
-                         ndarray[int64_t] counts,
-                         ndarray[int64_t, ndim=2] values,
-                         ndarray[int64_t] labels):
-    '''
-    Only aggregates on axis=0
-    '''
-    cdef:
-        Py_ssize_t i, j, lab, ncounts = len(counts)
-        Py_ssize_t N = values.shape[0], K = values.shape[1]
-        int64_t val
-        ndarray[int64_t, ndim=2] nobs = np.zeros((out.shape[0], out.shape[1]),
-                                                 dtype=np.int64)
-
-    if len(values) != len(labels):
-        raise AssertionError("len(index) != len(labels)")
-
-
-    with nogil:
-        for i in range(N):
-            lab = labels[i]
-            if lab < 0:
-                continue
-
-            counts[lab] += 1
-            for j in range(K):
-                val = values[i, j]
-
-                # not nan
-                nobs[lab, j] += val == val and val != iNaT
-
-        for i in range(ncounts):
-            for j in range(K):
-                out[i, j] = nobs[i, j]
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def group_count_object(ndarray[object, ndim=2] out,
-                         ndarray[int64_t] counts,
-                         ndarray[object, ndim=2] values,
-                         ndarray[int64_t] labels):
-    '''
-    Only aggregates on axis=0
-    '''
-    cdef:
-        Py_ssize_t i, j, lab, ncounts = len(counts)
-        Py_ssize_t N = values.shape[0], K = values.shape[1]
-        object val
-        ndarray[int64_t, ndim=2] nobs = np.zeros((out.shape[0], out.shape[1]),
-                                                 dtype=np.int64)
-
-    if len(values) != len(labels):
-        raise AssertionError("len(index) != len(labels)")
-
-
-    
-    for i in range(N):
-        lab = labels[i]
-        if lab < 0:
-            continue
-
-        counts[lab] += 1
-        for j in range(K):
-            val = values[i, j]
-
-            # not nan
-            nobs[lab, j] += val == val and val != iNaT
-
-    for i in range(ncounts):
-        for j in range(K):
-            out[i, j] = nobs[i, j]
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def group_count_int64(ndarray[int64_t, ndim=2] out,
-                         ndarray[int64_t] counts,
-                         ndarray[int64_t, ndim=2] values,
-                         ndarray[int64_t] labels):
-    '''
-    Only aggregates on axis=0
-    '''
-    cdef:
-        Py_ssize_t i, j, lab, ncounts = len(counts)
-        Py_ssize_t N = values.shape[0], K = values.shape[1]
-        int64_t val
-        ndarray[int64_t, ndim=2] nobs = np.zeros((out.shape[0], out.shape[1]),
-                                                 dtype=np.int64)
-
-    if len(values) != len(labels):
-        raise AssertionError("len(index) != len(labels)")
-
-
-    with nogil:
-        for i in range(N):
-            lab = labels[i]
-            if lab < 0:
-                continue
-
-            counts[lab] += 1
-            for j in range(K):
-                val = values[i, j]
-
-                # not nan
-                nobs[lab, j] += val == val and val != iNaT
-
-        for i in range(ncounts):
-            for j in range(K):
-                out[i, j] = nobs[i, j]
-
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def left_join_indexer_unique_float64(ndarray[float64_t] left,
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index f5693983f1cc1..a85e68602493b 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -2481,6 +2481,30 @@ def test_size(self):
             self.assertEqual(result[key], len(group))
 
     def test_count(self):
+        from string import ascii_lowercase
+        n = 1 << 15
+        dr = date_range('2015-08-30', periods=n // 10, freq='T')
+
+        df = DataFrame({
+                '1st':np.random.choice(list(ascii_lowercase), n),
+                '2nd':np.random.randint(0, 5, n),
+                '3rd':np.random.randn(n).round(3),
+                '4th':np.random.randint(-10, 10, n),
+                '5th':np.random.choice(dr, n),
+                '6th':np.random.randn(n).round(3),
+                '7th':np.random.randn(n).round(3),
+                '8th':np.random.choice(dr, n) - np.random.choice(dr, 1),
+                '9th':np.random.choice(list(ascii_lowercase), n)})
+
+        for col in df.columns.drop(['1st', '2nd', '4th']):
+            df.loc[np.random.choice(n, n // 10), col] = np.nan
+
+        df['9th'] = df['9th'].astype('category')
+
+        for key in '1st', '2nd', ['1st', '2nd']:
+            left = df.groupby(key).count()
+            right = df.groupby(key).apply(DataFrame.count).drop(key, axis=1)
+            assert_frame_equal(left, right)
 
         # GH5610
         # count counts non-nulls
@@ -4966,7 +4990,7 @@ def test_groupby_whitelist(self):
             'cumsum', 'cumprod', 'cummin', 'cummax', 'cumcount',
             'resample',
             'describe',
-            'rank', 'quantile', 'count',
+            'rank', 'quantile',
             'fillna',
             'mad',
             'any', 'all',
@@ -4987,7 +5011,7 @@ def test_groupby_whitelist(self):
             'cumsum', 'cumprod', 'cummin', 'cummax', 'cumcount',
             'resample',
             'describe',
-            'rank', 'quantile', 'count',
+            'rank', 'quantile',
             'fillna',
             'mad',
             'any', 'all',
@@ -5253,7 +5277,6 @@ def test__cython_agg_general(self):
                ('max', np.max),
                ('first', lambda x: x.iloc[0]),
                ('last', lambda x: x.iloc[-1]),
-               ('count', np.size),
                ]
         df = DataFrame(np.random.randn(1000))
         labels = np.random.randint(0, 50, size=1000).astype(float)
@@ -5439,26 +5462,26 @@ def test_first_last_max_min_on_time_data(self):
     def test_groupby_preserves_sort(self):
         # Test to ensure that groupby always preserves sort order of original
         # object. Issue #8588 and #9651
-        
-        df = DataFrame({'int_groups':[3,1,0,1,0,3,3,3], 
-                        'string_groups':['z','a','z','a','a','g','g','g'], 
+
+        df = DataFrame({'int_groups':[3,1,0,1,0,3,3,3],
+                        'string_groups':['z','a','z','a','a','g','g','g'],
                         'ints':[8,7,4,5,2,9,1,1],
                         'floats':[2.3,5.3,6.2,-2.4,2.2,1.1,1.1,5],
                         'strings':['z','d','a','e','word','word2','42','47']})
 
         # Try sorting on different types and with different group types
-        for sort_column in ['ints', 'floats', 'strings', ['ints','floats'], 
+        for sort_column in ['ints', 'floats', 'strings', ['ints','floats'],
                   ['ints','strings']]:
-            for group_column in ['int_groups', 'string_groups', 
+            for group_column in ['int_groups', 'string_groups',
                                  ['int_groups','string_groups']]:
 
                 df = df.sort_values(by=sort_column)
 
                 g = df.groupby(group_column)
-                
+
                 def test_sort(x):
                     assert_frame_equal(x, x.sort_values(by=sort_column))
-    
+
                 g.apply(test_sort)
 
 
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
index 0f55f79b8b9b9..df61387734cb3 100644
--- a/pandas/tests/test_multilevel.py
+++ b/pandas/tests/test_multilevel.py
@@ -829,7 +829,7 @@ def _check_counts(frame, axis=0):
             index = frame._get_axis(axis)
             for i in range(index.nlevels):
                 result = frame.count(axis=axis, level=i)
-                expected = frame.groupby(axis=axis, level=i).count(axis=axis)
+                expected = frame.groupby(axis=axis, level=i).count()
                 expected = expected.reindex_like(result).astype('i8')
                 assert_frame_equal(result, expected)