From 6c21eabdbdbbb6ae6e10f8dba34822f8230a2e5b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 2 Jul 2019 11:16:56 -0500 Subject: [PATCH 1/2] Object dtype for empty describe Closes https://github.com/pandas-dev/pandas/issues/27183 --- pandas/core/generic.py | 6 ++++-- pandas/tests/frame/test_analytics.py | 14 +++++++++++++- pandas/tests/series/test_analytics.py | 11 +++++++++++ 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 822428c6787be..c350e7e2546c3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9644,6 +9644,7 @@ def describe_categorical_1d(data): objcounts = data.value_counts() count_unique = len(objcounts[objcounts != 0]) result = [data.count(), count_unique] + dtype = None if result[1] > 0: top, freq = objcounts.index[0], objcounts.iloc[0] @@ -9668,9 +9669,10 @@ def describe_categorical_1d(data): # to maintain output shape consistency else: names += ['top', 'freq'] - result += [None, None] + result += [np.nan, np.nan] + dtype = 'object' - return pd.Series(result, index=names, name=data.name) + return pd.Series(result, index=names, name=data.name, dtype=dtype) def describe_1d(data): if is_bool_dtype(data): diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 01a398584b5e1..35f69b413ec7b 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -525,6 +525,17 @@ def test_bool_describe_in_mixed_frame(self): index=['count', 'unique', 'top', 'freq']) tm.assert_frame_equal(result, expected) + def test_describe_empty_object(self): + # https://github.com/pandas-dev/pandas/issues/27183 + df = pd.DataFrame({"A": [None, None]}, dtype=object) + result = df.describe() + expected = pd.DataFrame({"A": [0, 0, np.nan, np.nan]}, dtype=object, + index=['count', 'unique', 'top', 'freq']) + tm.assert_frame_equal(result, expected) + + result = df.iloc[:0].describe() + tm.assert_frame_equal(result, expected) + def test_describe_bool_frame(self): # GH 13891 df = pd.DataFrame({ @@ -595,7 +606,8 @@ def test_describe_empty_categorical_column(self): df = pd.DataFrame({"empty_col": Categorical([])}) result = df.describe() expected = DataFrame({'empty_col': [0, 0, None, None]}, - index=['count', 'unique', 'top', 'freq']) + index=['count', 'unique', 'top', 'freq'], + dtype='object') tm.assert_frame_equal(result, expected) def test_describe_categorical_columns(self): diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index df69bb35115cf..a4f65c0dc398a 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -42,6 +42,17 @@ def test_describe(self): index=['count', 'unique', 'top', 'freq']) tm.assert_series_equal(result, expected) + def test_describe_empty_object(self): + # https://github.com/pandas-dev/pandas/issues/27183 + s = pd.Series([None, None], dtype=object) + result = s.describe() + expected = pd.Series([0, 0, np.nan, np.nan], dtype=object, + index=['count', 'unique', 'top', 'freq']) + tm.assert_series_equal(result, expected) + + result = s[:0].describe() + tm.assert_series_equal(result, expected) + def test_describe_with_tz(self, tz_naive_fixture): # GH 21332 tz = tz_naive_fixture From 79622d3c0055f46ec560cfc0209d48edfb988ad1 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 2 Jul 2019 15:59:23 -0500 Subject: [PATCH 2/2] nan asserts --- pandas/tests/frame/test_analytics.py | 7 +++++-- pandas/tests/series/test_analytics.py | 3 +++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 35f69b413ec7b..9921d91d6de8c 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -601,14 +601,17 @@ def test_describe_categorical(self): def test_describe_empty_categorical_column(self): # GH 26397 - # Ensure the index of an an empty categoric DataFrame column + # Ensure the index of an an empty categorical DataFrame column # also contains (count, unique, top, freq) df = pd.DataFrame({"empty_col": Categorical([])}) result = df.describe() - expected = DataFrame({'empty_col': [0, 0, None, None]}, + expected = DataFrame({'empty_col': [0, 0, np.nan, np.nan]}, index=['count', 'unique', 'top', 'freq'], dtype='object') tm.assert_frame_equal(result, expected) + # ensure NaN, not None + assert np.isnan(result.iloc[2, 0]) + assert np.isnan(result.iloc[3, 0]) def test_describe_categorical_columns(self): # GH 11558 diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index a4f65c0dc398a..e48fd9ce11a7d 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -52,6 +52,9 @@ def test_describe_empty_object(self): result = s[:0].describe() tm.assert_series_equal(result, expected) + # ensure NaN, not None + assert np.isnan(result.iloc[2]) + assert np.isnan(result.iloc[3]) def test_describe_with_tz(self, tz_naive_fixture): # GH 21332