Skip to content

Commit 212df86

Browse files
TomAugspurgerjreback
authored andcommitted
Object dtype for empty describe (#27184)
1 parent a0bfbf0 commit 212df86

File tree

3 files changed

+36
-5
lines changed

3 files changed

+36
-5
lines changed

pandas/core/generic.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9652,6 +9652,7 @@ def describe_categorical_1d(data):
96529652
objcounts = data.value_counts()
96539653
count_unique = len(objcounts[objcounts != 0])
96549654
result = [data.count(), count_unique]
9655+
dtype = None
96559656
if result[1] > 0:
96569657
top, freq = objcounts.index[0], objcounts.iloc[0]
96579658

@@ -9676,9 +9677,10 @@ def describe_categorical_1d(data):
96769677
# to maintain output shape consistency
96779678
else:
96789679
names += ['top', 'freq']
9679-
result += [None, None]
9680+
result += [np.nan, np.nan]
9681+
dtype = 'object'
96809682

9681-
return pd.Series(result, index=names, name=data.name)
9683+
return pd.Series(result, index=names, name=data.name, dtype=dtype)
96829684

96839685
def describe_1d(data):
96849686
if is_bool_dtype(data):

pandas/tests/frame/test_analytics.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,17 @@ def test_bool_describe_in_mixed_frame(self):
525525
index=['count', 'unique', 'top', 'freq'])
526526
tm.assert_frame_equal(result, expected)
527527

528+
def test_describe_empty_object(self):
529+
# https://github.com/pandas-dev/pandas/issues/27183
530+
df = pd.DataFrame({"A": [None, None]}, dtype=object)
531+
result = df.describe()
532+
expected = pd.DataFrame({"A": [0, 0, np.nan, np.nan]}, dtype=object,
533+
index=['count', 'unique', 'top', 'freq'])
534+
tm.assert_frame_equal(result, expected)
535+
536+
result = df.iloc[:0].describe()
537+
tm.assert_frame_equal(result, expected)
538+
528539
def test_describe_bool_frame(self):
529540
# GH 13891
530541
df = pd.DataFrame({
@@ -590,13 +601,17 @@ def test_describe_categorical(self):
590601

591602
def test_describe_empty_categorical_column(self):
592603
# GH 26397
593-
# Ensure the index of an an empty categoric DataFrame column
604+
# Ensure the index of an an empty categorical DataFrame column
594605
# also contains (count, unique, top, freq)
595606
df = pd.DataFrame({"empty_col": Categorical([])})
596607
result = df.describe()
597-
expected = DataFrame({'empty_col': [0, 0, None, None]},
598-
index=['count', 'unique', 'top', 'freq'])
608+
expected = DataFrame({'empty_col': [0, 0, np.nan, np.nan]},
609+
index=['count', 'unique', 'top', 'freq'],
610+
dtype='object')
599611
tm.assert_frame_equal(result, expected)
612+
# ensure NaN, not None
613+
assert np.isnan(result.iloc[2, 0])
614+
assert np.isnan(result.iloc[3, 0])
600615

601616
def test_describe_categorical_columns(self):
602617
# GH 11558

pandas/tests/series/test_analytics.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,20 @@ def test_describe(self):
4242
index=['count', 'unique', 'top', 'freq'])
4343
tm.assert_series_equal(result, expected)
4444

45+
def test_describe_empty_object(self):
46+
# https://github.com/pandas-dev/pandas/issues/27183
47+
s = pd.Series([None, None], dtype=object)
48+
result = s.describe()
49+
expected = pd.Series([0, 0, np.nan, np.nan], dtype=object,
50+
index=['count', 'unique', 'top', 'freq'])
51+
tm.assert_series_equal(result, expected)
52+
53+
result = s[:0].describe()
54+
tm.assert_series_equal(result, expected)
55+
# ensure NaN, not None
56+
assert np.isnan(result.iloc[2])
57+
assert np.isnan(result.iloc[3])
58+
4559
def test_describe_with_tz(self, tz_naive_fixture):
4660
# GH 21332
4761
tz = tz_naive_fixture

0 commit comments

Comments
 (0)