diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 7e4fa44ea8ded..3737914c8c595 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -447,7 +447,7 @@ Bug Fixes - Bug in ``Series`` constructor when both ``copy=True`` and ``dtype`` arguments are provided (:issue:`15125`) -- Bug in ``pd.read_csv()`` for the C engine where ``usecols`` were being indexed incorrectly with ``parse_dates`` (:issue:`14792`) +- Bug in ``pd.read_csv()`` for the C engine where ``usecols`` were being indexed incorrectly with ``parse_dates`` (:issue:`14792 - Incorrect dtyped ``Series`` was returned by comparison methods (e.g., ``lt``, ``gt``, ...) against a constant for an empty ``DataFrame`` (:issue:`15077`) - Bug in ``Series.dt.round`` inconsistent behaviour on NAT's with different arguments (:issue:`14940`) @@ -465,3 +465,5 @@ Bug Fixes - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) + +- Bug in window function ``count`` not counting ``np.Inf`` (:issue:`12541`) \ No newline at end of file diff --git a/pandas/core/window.py b/pandas/core/window.py index b330a12110923..bda134dd8a2a4 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -762,17 +762,7 @@ def count(self): results = [] for b in blocks: - - if needs_i8_conversion(b.values): - result = b.notnull().astype(int) - else: - try: - result = np.isfinite(b).astype(float) - except TypeError: - result = np.isfinite(b.astype(float)).astype(float) - - result[pd.isnull(result)] = 0 - + result = b.notnull().astype(int) result = self._constructor(result, window=window, min_periods=0, center=self.center).sum() results.append(result) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 1afd5dad404c8..3f53b5eaf3753 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -207,6 +207,44 @@ def f(): 'A', 'ra', 'std'), ('B', 'rb', 'mean'), ('B', 'rb', 'std')]) tm.assert_frame_equal(result, expected, check_like=True) + def test_count_nonnumeric_types(self): + # GH12541 + cols = ['int', 'float', 'string', 'datetime', 'timedelta', 'periods', + 'fl_inf', 'fl_nan', 'str_nan', 'dt_nat', 'periods_nat'] + + df = DataFrame( + {'int': [1, 2, 3], + 'float': [4., 5., 6.], + 'string': list('abc'), + 'datetime': pd.date_range('20170101', periods=3), + 'timedelta': pd.timedelta_range('1 s', periods=3, freq='s'), + 'periods': [pd.Period('2012-01'), pd.Period('2012-02'), + pd.Period('2012-03')], + 'fl_inf': [1., 2., np.Inf], + 'fl_nan': [1., 2., np.NaN], + 'str_nan': ['aa', 'bb', np.NaN], + 'dt_nat': [pd.Timestamp('20170101'), pd.Timestamp('20170203'), + pd.Timestamp(None)], + 'periods_nat': [pd.Period('2012-01'), pd.Period('2012-02'), + pd.Period(None)]}, + columns=cols) + + expected = DataFrame( + {'int': [1., 2., 2.], + 'float': [1., 2., 2.], + 'string': [1., 2., 2.], + 'datetime': [1., 2., 2.], + 'timedelta': [1., 2., 2.], + 'periods': [1., 2., 2.], + 'fl_inf': [1., 2., 2.], + 'fl_nan': [1., 2., 1.], + 'str_nan': [1., 2., 1.], + 'dt_nat': [1., 2., 1.], + 'periods_nat': [1., 2., 1.]}, + columns=cols) + + self.assert_frame_equal(df.rolling(window=2).count(), expected) + def test_window_with_args(self): tm._skip_if_no_scipy()