From 13be18ed75ade74e8270c692f956c6cb274baaa5 Mon Sep 17 00:00:00 2001
From: tv3141 <tv3141@users.noreply.github.com>
Date: Wed, 18 Mar 2020 21:56:00 +0000
Subject: [PATCH 01/12] Enable bounds checking.

---
 pandas/_libs/lib.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index dc2e8c097bc14..2a123f4c2028f 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -775,7 +775,7 @@ def get_level_sorter(const int64_t[:] label, const int64_t[:] starts):
     return out
 
 
-@cython.boundscheck(False)
+@cython.boundscheck(True)
 @cython.wraparound(False)
 def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask,
                    const int64_t[:] labels,

From 36513c3fe91c241f794a0c1fd0cf9040dc844859 Mon Sep 17 00:00:00 2001
From: tv3141 <tv3141@users.noreply.github.com>
Date: Wed, 18 Mar 2020 21:52:05 +0000
Subject: [PATCH 02/12] Add failing tests.

---
 pandas/tests/groupby/test_counting.py | 55 ++++++++++++++++++++++++++-
 1 file changed, 54 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py
index b4239d7d34a90..c6246ffa4e3b3 100644
--- a/pandas/tests/groupby/test_counting.py
+++ b/pandas/tests/groupby/test_counting.py
@@ -3,7 +3,8 @@
 import numpy as np
 import pytest
 
-from pandas import DataFrame, MultiIndex, Period, Series, Timedelta, Timestamp
+from pandas import (DataFrame, Index, MultiIndex, Period, Series, Timedelta,
+    Timestamp)
 import pandas._testing as tm
 
 
@@ -220,3 +221,55 @@ def test_count_with_only_nans_in_first_group(self):
         mi = MultiIndex(levels=[[], ["a", "b"]], codes=[[], []], names=["A", "B"])
         expected = Series([], index=mi, dtype=np.int64, name="C")
         tm.assert_series_equal(result, expected, check_index_type=False)
+
+    #def test_count_groupby_index_with_nan_in_data(self):
+    #    # https://github.com/pandas-dev/pandas/issues/21824
+    #    df = DataFrame({"Person": ["John", "Myla", "John", "John", "Myla"],
+    #                       "Age": [24., np.nan, 21., 33, 26],
+    #                       "Single": [False, True, True, True, False]})
+
+    #    res = df.set_index(["Person", "Single"]).count(level="Person")
+    #    expected = DataFrame(index=Index(["John", "Myla"], name="Person"),
+    #                         data={"Age": [2, 1]})
+
+    def test_count_groupby_index_with_nan_in_index(self):
+        # https://github.com/pandas-dev/pandas/issues/21824
+        df = DataFrame({"Person": ["John", "Myla", None, "John", "Myla"],
+                           "Age": [24., 5, 21., 33, 26],
+                           "Single": [False, True, True, True, False]})
+
+        res = df.set_index(["Person", "Single"]).count(level="Person")
+        expected = DataFrame(index=Index(["John", "Myla"], name="Person"),
+                             data={"Age": [2, 2]})
+
+    #def test_count_groupby_index_with_nan_in_data_and_in_index(self):
+    #    # https://github.com/pandas-dev/pandas/issues/21824
+    #    df = DataFrame({"Person": ["John", "Myla", None, "John", "Myla"],
+    #                       "Age": [24., np.nan, 21., 33, 26],
+    #                       "Single": [False, True, True, True, False]})
+
+    #    res = df.set_index(["Person", "Single"]).count(level="Person")
+    #    expected = DataFrame(index=Index(["John", "Myla"], name="Person"),
+    #                         data={"Age": [2, 1]})
+
+
+    def test_count_groupby_column_with_nan_in_groupby_column(self):
+        df = DataFrame({"A": [1, 1, 1, 1, 1], "B": [5, 4, np.NaN, 3, 0]})
+        res = df.groupby(["B"]).count()
+        expected = DataFrame(index=Index([0.0, 3.0, 4.0, 5.0], name="B"),
+                             data={"A": [1, 1, 1, 1]})
+        tm.assert_frame_equal(expected, res)
+
+    #def test_count_groupby_column_with_nan_in_data(self):
+    #    df = DataFrame({"A": [1, np.NaN, 1, 1, 1], "B": [5, 4., 2, 3, 0]})
+    #    res = df.groupby(["B"]).count()
+    #    expected = DataFrame(index=Index([0.0, 2.0, 3.0, 4.0, 5.0], name="B"),
+    #                         data={"A": [1, 1, 1, 0, 1]})
+    #    tm.assert_frame_equal(expected, res)
+
+    #def test_count_groupby_column_with_nan_in_data_and_in_groupby_column(self):
+    #    df = DataFrame({"A": [1, np.NaN, 1, 1, 1], "B": [5, 4, np.NaN, 3, 0]})
+    #    res = df.groupby(["B"]).count()
+    #    expected = DataFrame(index=Index([0.0, 3.0, 4.0, 5.0], name="B"),
+    #                         data={"A": [1, 1, 0, 1]})
+    #    tm.assert_frame_equal(expected, res)

From d6df0f0c845da2319851d65f7d5978ae8a3a5649 Mon Sep 17 00:00:00 2001
From: tv3141 <tv3141@users.noreply.github.com>
Date: Tue, 17 Mar 2020 20:26:56 +0000
Subject: [PATCH 03/12] Count only masked (non-na) values.

---
 pandas/_libs/lib.pyx | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 2a123f4c2028f..f18fab9207fda 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -793,14 +793,16 @@ def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask,
         with nogil:
             for i in range(n):
                 for j in range(k):
-                    counts[labels[i], j] += mask[i, j]
+                    if mask[i, j]:
+                        counts[labels[i], j] += 1
 
     else:  # axis == 1
         counts = np.zeros((n, max_bin), dtype='i8')
         with nogil:
             for i in range(n):
                 for j in range(k):
-                    counts[i, labels[j]] += mask[i, j]
+                    if mask[i, j]:
+                        counts[i, labels[j]] += 1
 
     return counts
 

From 049b897f603a5f96cfe55ba071228a3eda7470f5 Mon Sep 17 00:00:00 2001
From: tv3141 <tv3141@users.noreply.github.com>
Date: Sat, 21 Mar 2020 18:15:12 +0000
Subject: [PATCH 04/12] Mask NaNs in index and dataframe values.

---
 pandas/core/frame.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index baa6fb07ff233..745646b2d7a9c 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7820,13 +7820,21 @@ def _count_level(self, level, axis=0, numeric_only=False):
                 f"Can only count levels on hierarchical {self._get_axis_name(axis)}."
             )
 
+        # Mask NaNs: Mask rows where the index level is NaN and all values in
+        # the DataFrame that are NaN
         if frame._is_mixed_type:
             # Since we have mixed types, calling notna(frame.values) might
             # upcast everything to object
-            mask = notna(frame).values
+            mask = (
+                notna(frame.index.get_level_values(level=level)).reshape(-1, 1) &
+                notna(frame).values
+            )
         else:
             # But use the speedup when we have homogeneous dtypes
-            mask = notna(frame.values)
+            mask = (
+                notna(frame.index.get_level_values(level=level)).reshape(-1, 1) &
+                notna(frame.values)
+            )
 
         if axis == 1:
             # We're transposing the mask rather than frame to avoid potential

From 1e755b3449733c955177b2af880b16bf03c319b8 Mon Sep 17 00:00:00 2001
From: tv3141 <tv3141@users.noreply.github.com>
Date: Sat, 21 Mar 2020 18:22:33 +0000
Subject: [PATCH 05/12] Clean up tests.

---
 pandas/tests/groupby/test_counting.py | 42 +++------------------------
 1 file changed, 4 insertions(+), 38 deletions(-)

diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py
index c6246ffa4e3b3..869276ca6785e 100644
--- a/pandas/tests/groupby/test_counting.py
+++ b/pandas/tests/groupby/test_counting.py
@@ -4,7 +4,7 @@
 import pytest
 
 from pandas import (DataFrame, Index, MultiIndex, Period, Series, Timedelta,
-    Timestamp)
+                    Timestamp)
 import pandas._testing as tm
 
 
@@ -222,54 +222,20 @@ def test_count_with_only_nans_in_first_group(self):
         expected = Series([], index=mi, dtype=np.int64, name="C")
         tm.assert_series_equal(result, expected, check_index_type=False)
 
-    #def test_count_groupby_index_with_nan_in_data(self):
-    #    # https://github.com/pandas-dev/pandas/issues/21824
-    #    df = DataFrame({"Person": ["John", "Myla", "John", "John", "Myla"],
-    #                       "Age": [24., np.nan, 21., 33, 26],
-    #                       "Single": [False, True, True, True, False]})
-
-    #    res = df.set_index(["Person", "Single"]).count(level="Person")
-    #    expected = DataFrame(index=Index(["John", "Myla"], name="Person"),
-    #                         data={"Age": [2, 1]})
-
     def test_count_groupby_index_with_nan_in_index(self):
         # https://github.com/pandas-dev/pandas/issues/21824
         df = DataFrame({"Person": ["John", "Myla", None, "John", "Myla"],
-                           "Age": [24., 5, 21., 33, 26],
-                           "Single": [False, True, True, True, False]})
+                        "Age": [24., 5, 21., 33, 26],
+                        "Single": [False, True, True, True, False]})
 
         res = df.set_index(["Person", "Single"]).count(level="Person")
         expected = DataFrame(index=Index(["John", "Myla"], name="Person"),
                              data={"Age": [2, 2]})
 
-    #def test_count_groupby_index_with_nan_in_data_and_in_index(self):
-    #    # https://github.com/pandas-dev/pandas/issues/21824
-    #    df = DataFrame({"Person": ["John", "Myla", None, "John", "Myla"],
-    #                       "Age": [24., np.nan, 21., 33, 26],
-    #                       "Single": [False, True, True, True, False]})
-
-    #    res = df.set_index(["Person", "Single"]).count(level="Person")
-    #    expected = DataFrame(index=Index(["John", "Myla"], name="Person"),
-    #                         data={"Age": [2, 1]})
-
-
     def test_count_groupby_column_with_nan_in_groupby_column(self):
+        # https://github.com/pandas-dev/pandas/issues/32841
         df = DataFrame({"A": [1, 1, 1, 1, 1], "B": [5, 4, np.NaN, 3, 0]})
         res = df.groupby(["B"]).count()
         expected = DataFrame(index=Index([0.0, 3.0, 4.0, 5.0], name="B"),
                              data={"A": [1, 1, 1, 1]})
         tm.assert_frame_equal(expected, res)
-
-    #def test_count_groupby_column_with_nan_in_data(self):
-    #    df = DataFrame({"A": [1, np.NaN, 1, 1, 1], "B": [5, 4., 2, 3, 0]})
-    #    res = df.groupby(["B"]).count()
-    #    expected = DataFrame(index=Index([0.0, 2.0, 3.0, 4.0, 5.0], name="B"),
-    #                         data={"A": [1, 1, 1, 0, 1]})
-    #    tm.assert_frame_equal(expected, res)
-
-    #def test_count_groupby_column_with_nan_in_data_and_in_groupby_column(self):
-    #    df = DataFrame({"A": [1, np.NaN, 1, 1, 1], "B": [5, 4, np.NaN, 3, 0]})
-    #    res = df.groupby(["B"]).count()
-    #    expected = DataFrame(index=Index([0.0, 3.0, 4.0, 5.0], name="B"),
-    #                         data={"A": [1, 1, 0, 1]})
-    #    tm.assert_frame_equal(expected, res)

From de752483534146e3c54152974858d0a5da73e321 Mon Sep 17 00:00:00 2001
From: tv3141 <tv3141@users.noreply.github.com>
Date: Sat, 21 Mar 2020 18:33:40 +0000
Subject: [PATCH 06/12] Re-disable bounds checking.

---
 pandas/_libs/lib.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index f18fab9207fda..3dfc373372859 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -775,7 +775,7 @@ def get_level_sorter(const int64_t[:] label, const int64_t[:] starts):
     return out
 
 
-@cython.boundscheck(True)
+@cython.boundscheck(False)
 @cython.wraparound(False)
 def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask,
                    const int64_t[:] labels,

From 3bea5af618e505c7721126f538f99c8643688b2e Mon Sep 17 00:00:00 2001
From: tv3141 <tv3141@users.noreply.github.com>
Date: Sat, 21 Mar 2020 19:21:19 +0000
Subject: [PATCH 07/12] Move test into right module.

---
 pandas/tests/groupby/test_counting.py | 10 ----------
 pandas/tests/test_multilevel.py       | 11 +++++++++++
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py
index 869276ca6785e..4e00b3f4a44d0 100644
--- a/pandas/tests/groupby/test_counting.py
+++ b/pandas/tests/groupby/test_counting.py
@@ -222,16 +222,6 @@ def test_count_with_only_nans_in_first_group(self):
         expected = Series([], index=mi, dtype=np.int64, name="C")
         tm.assert_series_equal(result, expected, check_index_type=False)
 
-    def test_count_groupby_index_with_nan_in_index(self):
-        # https://github.com/pandas-dev/pandas/issues/21824
-        df = DataFrame({"Person": ["John", "Myla", None, "John", "Myla"],
-                        "Age": [24., 5, 21., 33, 26],
-                        "Single": [False, True, True, True, False]})
-
-        res = df.set_index(["Person", "Single"]).count(level="Person")
-        expected = DataFrame(index=Index(["John", "Myla"], name="Person"),
-                             data={"Age": [2, 2]})
-
     def test_count_groupby_column_with_nan_in_groupby_column(self):
         # https://github.com/pandas-dev/pandas/issues/32841
         df = DataFrame({"A": [1, 1, 1, 1, 1], "B": [5, 4, np.NaN, 3, 0]})
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
index 84279d874bae1..80cc57604dc80 100644
--- a/pandas/tests/test_multilevel.py
+++ b/pandas/tests/test_multilevel.py
@@ -248,6 +248,17 @@ def _check_counts(frame, axis=0):
         result = self.frame.count(level=0, numeric_only=True)
         tm.assert_index_equal(result.columns, Index(list("ABC"), name="exp"))
 
+    def test_count_index_with_nan(self):
+        # https://github.com/pandas-dev/pandas/issues/21824
+        df = DataFrame({"Person": ["John", "Myla", None, "John", "Myla"],
+                        "Age": [24., 5, 21., 33, 26],
+                        "Single": [False, True, True, True, False]})
+
+        res = df.set_index(["Person", "Single"]).count(level="Person")
+        expected = DataFrame(index=Index(["John", "Myla"], name="Person"),
+                             data={"Age": [2, 2]})
+
+
     def test_count_level_series(self):
         index = MultiIndex(
             levels=[["foo", "bar", "baz"], ["one", "two", "three", "four"]],

From 432cd6895d4f1710070379c3deb6e92795c33e14 Mon Sep 17 00:00:00 2001
From: tv3141 <tv3141@users.noreply.github.com>
Date: Sun, 22 Mar 2020 20:10:11 +0000
Subject: [PATCH 08/12] Fix masking logic to avoid SegFaults with
 DataFrame.count().

---
 pandas/core/frame.py            | 36 +++++++++++----------------------
 pandas/tests/test_multilevel.py | 27 ++++++++++++++++++++-----
 2 files changed, 34 insertions(+), 29 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 745646b2d7a9c..cb1128c845a2f 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7820,26 +7820,15 @@ def _count_level(self, level, axis=0, numeric_only=False):
                 f"Can only count levels on hierarchical {self._get_axis_name(axis)}."
             )
 
-        # Mask NaNs: Mask rows where the index level is NaN and all values in
-        # the DataFrame that are NaN
-        if frame._is_mixed_type:
-            # Since we have mixed types, calling notna(frame.values) might
-            # upcast everything to object
-            mask = (
-                notna(frame.index.get_level_values(level=level)).reshape(-1, 1) &
-                notna(frame).values
-            )
-        else:
-            # But use the speedup when we have homogeneous dtypes
-            mask = (
-                notna(frame.index.get_level_values(level=level)).reshape(-1, 1) &
-                notna(frame.values)
-            )
+        # Mask NaNs: Mask rows or columns where the index level is NaN, and all
+        # values in the DataFrame that are NaN
+        values_mask = notna(frame.values)
 
+        index_mask = notna(count_axis.get_level_values(level=level))
         if axis == 1:
-            # We're transposing the mask rather than frame to avoid potential
-            # upcasts to object, which induces a ~20x slowdown
-            mask = mask.T
+            mask = index_mask & values_mask
+        else:
+            mask = index_mask.reshape(-1, 1) & values_mask
 
         if isinstance(level, str):
             level = count_axis._get_level_number(level)
@@ -7847,15 +7836,14 @@ def _count_level(self, level, axis=0, numeric_only=False):
         level_name = count_axis._names[level]
         level_index = count_axis.levels[level]._shallow_copy(name=level_name)
         level_codes = ensure_int64(count_axis.codes[level])
-        counts = lib.count_level_2d(mask, level_codes, len(level_index), axis=0)
-
-        result = DataFrame(counts, index=level_index, columns=agg_axis)
+        counts = lib.count_level_2d(mask, level_codes, len(level_index), axis=axis)
 
         if axis == 1:
-            # Undo our earlier transpose
-            return result.T
+            result = DataFrame(counts, index=agg_axis, columns=level_index)
         else:
-            return result
+            result = DataFrame(counts, index=level_index, columns=agg_axis)
+
+        return result
 
     def _reduce(
         self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
index 80cc57604dc80..0e12f7207295e 100644
--- a/pandas/tests/test_multilevel.py
+++ b/pandas/tests/test_multilevel.py
@@ -250,14 +250,31 @@ def _check_counts(frame, axis=0):
 
     def test_count_index_with_nan(self):
         # https://github.com/pandas-dev/pandas/issues/21824
-        df = DataFrame({"Person": ["John", "Myla", None, "John", "Myla"],
-                        "Age": [24., 5, 21., 33, 26],
-                        "Single": [False, True, True, True, False]})
+        df = DataFrame(
+            {
+                "Person": ["John", "Myla", None, "John", "Myla"],
+                "Age": [24.0, 5, 21.0, 33, 26],
+                "Single": [False, True, True, True, False],
+            }
+        )
 
+        # count on row labels
         res = df.set_index(["Person", "Single"]).count(level="Person")
-        expected = DataFrame(index=Index(["John", "Myla"], name="Person"),
-                             data={"Age": [2, 2]})
+        expected = DataFrame(
+            index=Index(["John", "Myla"], name="Person"),
+            columns=Index(["Age"]),
+            data=np.array([2, 2]),
+        )
+        tm.assert_frame_equal(res, expected)
 
+        # count on column labels
+        res = df.set_index(["Person", "Single"]).T.count(level="Person", axis=1)
+        expected = DataFrame(
+            columns=Index(["John", "Myla"], name="Person"),
+            index=Index(["Age"]),
+            data=np.array([[2, 2]]),
+        )
+        tm.assert_frame_equal(res, expected)
 
     def test_count_level_series(self):
         index = MultiIndex(

From 7e379b1a20abf359c2a8daa1c50ce6cc57dc2d90 Mon Sep 17 00:00:00 2001
From: tv3141 <tv3141@users.noreply.github.com>
Date: Sun, 22 Mar 2020 20:10:27 +0000
Subject: [PATCH 09/12] black

---
 pandas/tests/groupby/test_counting.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py
index 4e00b3f4a44d0..56a18757da6e7 100644
--- a/pandas/tests/groupby/test_counting.py
+++ b/pandas/tests/groupby/test_counting.py
@@ -3,8 +3,7 @@
 import numpy as np
 import pytest
 
-from pandas import (DataFrame, Index, MultiIndex, Period, Series, Timedelta,
-                    Timestamp)
+from pandas import DataFrame, Index, MultiIndex, Period, Series, Timedelta, Timestamp
 import pandas._testing as tm
 
 
@@ -226,6 +225,7 @@ def test_count_groupby_column_with_nan_in_groupby_column(self):
         # https://github.com/pandas-dev/pandas/issues/32841
         df = DataFrame({"A": [1, 1, 1, 1, 1], "B": [5, 4, np.NaN, 3, 0]})
         res = df.groupby(["B"]).count()
-        expected = DataFrame(index=Index([0.0, 3.0, 4.0, 5.0], name="B"),
-                             data={"A": [1, 1, 1, 1]})
+        expected = DataFrame(
+            index=Index([0.0, 3.0, 4.0, 5.0], name="B"), data={"A": [1, 1, 1, 1]}
+        )
         tm.assert_frame_equal(expected, res)

From a660d0f3b94c65e498f65dc8ef5b1a25a1815ede Mon Sep 17 00:00:00 2001
From: tv3141 <tv3141@users.noreply.github.com>
Date: Mon, 23 Mar 2020 22:33:15 +0000
Subject: [PATCH 10/12] Add info to whatsnew.

---
 doc/source/whatsnew/v1.1.0.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 0d3a9a8f969a4..1565f4c16ee80 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -271,7 +271,7 @@ Numeric
 - Bug in :meth:`to_numeric` with string argument ``"uint64"`` and ``errors="coerce"`` silently fails (:issue:`32394`)
 - Bug in :meth:`to_numeric` with ``downcast="unsigned"`` fails for empty data (:issue:`32493`)
 - Bug in :meth:`DataFrame.mean` with ``numeric_only=False`` and either ``datetime64`` dtype or ``PeriodDtype`` column incorrectly raising ``TypeError`` (:issue:`32426`)
--
+- Bug in :meth:`DataFrame.count` with ``level="foo"`` and index level ``"foo"`` containing NaNs causes segmentation fault (:issue:`21824`)
 
 Conversion
 ^^^^^^^^^^
@@ -361,6 +361,7 @@ Groupby/resample/rolling
 
 - Bug in :meth:`GroupBy.apply` raises ``ValueError`` when the ``by`` axis is not sorted and has duplicates and the applied ``func`` does not mutate passed in objects (:issue:`30667`)
 - Bug in :meth:`DataFrameGroupby.transform` produces incorrect result with transformation functions (:issue:`30918`)
+- Bug in :meth:`GroupBy.count` causes segmentation fault when grouped-by column contains NaNs (:issue:`32841`)
 
 Reshaping
 ^^^^^^^^^

From 52a6f99b4230c958e8f8d6e813cd89661c05d81c Mon Sep 17 00:00:00 2001
From: tv3141 <tv3141@users.noreply.github.com>
Date: Tue, 24 Mar 2020 20:16:17 +0000
Subject: [PATCH 11/12] Fix issue with default int32 on Windows.

---
 pandas/tests/test_multilevel.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
index 0e12f7207295e..f025abd5628cf 100644
--- a/pandas/tests/test_multilevel.py
+++ b/pandas/tests/test_multilevel.py
@@ -263,7 +263,7 @@ def test_count_index_with_nan(self):
         expected = DataFrame(
             index=Index(["John", "Myla"], name="Person"),
             columns=Index(["Age"]),
-            data=np.array([2, 2]),
+            data=[2, 2],
         )
         tm.assert_frame_equal(res, expected)
 
@@ -272,7 +272,7 @@ def test_count_index_with_nan(self):
         expected = DataFrame(
             columns=Index(["John", "Myla"], name="Person"),
             index=Index(["Age"]),
-            data=np.array([[2, 2]]),
+            data=[[2, 2]],
         )
         tm.assert_frame_equal(res, expected)
 

From 60289c28bf9aa2e0203ef3edc5d19e85a584e953 Mon Sep 17 00:00:00 2001
From: tv3141 <tv3141@users.noreply.github.com>
Date: Tue, 31 Mar 2020 22:16:09 +0100
Subject: [PATCH 12/12] Re-add notna optimisation.

---
 pandas/core/frame.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 5ea5ec6832821..6ddb00db350af 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7895,7 +7895,13 @@ def _count_level(self, level, axis=0, numeric_only=False):
 
         # Mask NaNs: Mask rows or columns where the index level is NaN, and all
         # values in the DataFrame that are NaN
-        values_mask = notna(frame.values)
+        if frame._is_mixed_type:
+            # Since we have mixed types, calling notna(frame.values) might
+            # upcast everything to object
+            values_mask = notna(frame).values
+        else:
+            # But use the speedup when we have homogeneous dtypes
+            values_mask = notna(frame.values)
 
         index_mask = notna(count_axis.get_level_values(level=level))
         if axis == 1: