From 74cba7d2f5392e9de39e0de1b479a9d21e8561d3 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Sun, 12 Jan 2020 00:26:53 +0800 Subject: [PATCH 01/12] BUG: Series rolling count ignores min_periods (GH26996) --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/window/rolling.py | 4 +++- pandas/tests/window/test_expanding.py | 7 +++++++ pandas/tests/window/test_rolling.py | 7 +++++++ 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index a820ef132957a..95d2adc491fb2 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1165,6 +1165,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`) - Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`) - Bug in :meth:`GroupBy.pct_change` and :meth:`core.groupby.SeriesGroupBy.pct_change` causes ``TypeError`` when ``fill_method`` is ``None`` (:issue:`30463`) +- Bug in :meth:`Rolling.count` and :meth:`Expanding.count` argument ``min_periods`` ignored (:issue:`26996`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index f612826132fd7..a89991dbf14af 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1185,6 +1185,8 @@ def count(self): window = self._get_window() window = min(window, len(obj)) if not self.center else window + min_periods = self.min_periods if self.min_periods is not None else 0 + min_periods = min(min_periods, len(obj)) if not self.center else min_periods results = [] for b in blocks: @@ -1192,7 +1194,7 @@ def count(self): result = self._constructor( result, window=window, - min_periods=0, + min_periods=min_periods, center=self.center, axis=self.axis, closed=self.closed, diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index fc4bd50f25c73..58ad20e473560 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -113,3 +113,10 @@ def test_expanding_axis(self, axis_frame): result = df.expanding(3, axis=axis_frame).sum() tm.assert_frame_equal(result, expected) + + +def test_expanding_count_with_min_periods(): + # GH 26996 + result = Series(range(5)).expanding(min_periods=3).count() + expected = Series([np.nan, np.nan, 3.0, 4.0, 5.0]) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 04fab93b71c4a..26606973f5210 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -426,3 +426,10 @@ def test_min_periods1(): result = df["a"].rolling(3, center=True, min_periods=1).max() expected = pd.Series([1.0, 2.0, 2.0, 2.0, 1.0], name="a") tm.assert_series_equal(result, expected) + + +def test_rolling_count_with_min_periods(): + # GH 26996 + result = Series(range(5)).rolling(3, min_periods=3).count() + expected = Series([np.nan, np.nan, 3.0, 3.0, 3.0]) + tm.assert_series_equal(result, expected) From bd0fb503e5d311042ef32f0a424d2759137cab4b Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Fri, 17 Jan 2020 23:53:34 +0800 Subject: [PATCH 02/12] ENH: handles min_periods argument in rolling.count (GH26996) --- pandas/core/window/rolling.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index a89991dbf14af..7688206400caa 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1185,8 +1185,14 @@ def count(self): window = self._get_window() window = min(window, len(obj)) if not self.center else window - min_periods = self.min_periods if self.min_periods is not None else 0 - min_periods = min(min_periods, len(obj)) if not self.center else min_periods + + # We set the default value min_periods to be 0 because count method + # is meant to count NAs, we don't want it by default requires all + # values in the window to be valid to produce a valid count + min_periods = 0 if self.min_periods is None else self.min_periods + + # this is required as window is mutate above + min_periods = min(min_periods, window) results = [] for b in blocks: From 61226b59b8cb304186e4670e85adb26dccc3df78 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Sat, 18 Jan 2020 23:52:20 +0800 Subject: [PATCH 03/12] DOC: moved whatsnew to V1.1.0 # Conflicts: # doc/source/whatsnew/v1.1.0.rst --- doc/source/whatsnew/v1.0.0.rst | 1 - doc/source/whatsnew/v1.1.0.rst | 167 +++++++++++++++++++++++++++++++++ 2 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 doc/source/whatsnew/v1.1.0.rst diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 95d2adc491fb2..a820ef132957a 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1165,7 +1165,6 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`) - Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`) - Bug in :meth:`GroupBy.pct_change` and :meth:`core.groupby.SeriesGroupBy.pct_change` causes ``TypeError`` when ``fill_method`` is ``None`` (:issue:`30463`) -- Bug in :meth:`Rolling.count` and :meth:`Expanding.count` argument ``min_periods`` ignored (:issue:`26996`) Reshaping ^^^^^^^^^ diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst new file mode 100644 index 0000000000000..e18a6331751e5 --- /dev/null +++ b/doc/source/whatsnew/v1.1.0.rst @@ -0,0 +1,167 @@ +.. _whatsnew_110: + +What's new in 1.1.0 (??) +------------------------ + +These are the changes in pandas 1.1.0. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +Enhancements +~~~~~~~~~~~~ + +.. _whatsnew_110.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- +- + + +.. --------------------------------------------------------------------------- + +.. _whatsnew_110.deprecations: + +Deprecations +~~~~~~~~~~~~ + +- +- + +.. --------------------------------------------------------------------------- + + +.. _whatsnew_110.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- +- + +.. --------------------------------------------------------------------------- + +.. _whatsnew_110.bug_fixes: + +Bug fixes +~~~~~~~~~ + + +Categorical +^^^^^^^^^^^ + +- +- + +Datetimelike +^^^^^^^^^^^^ +- Bug in :class:`Timestamp` where constructing :class:`Timestamp` from ambiguous epoch time and calling constructor again changed :meth:`Timestamp.value` property (:issue:`24329`) +- +- + +Timedelta +^^^^^^^^^ + +- +- + +Timezones +^^^^^^^^^ + +- +- + + +Numeric +^^^^^^^ +- +- + +Conversion +^^^^^^^^^^ +- Bug in :class:`Series` construction from NumPy array with big-endian ``datetime64`` dtype (:issue:`29684`) +- +- + +Strings +^^^^^^^ + +- +- + + +Interval +^^^^^^^^ + +- +- + +Indexing +^^^^^^^^ + +- +- + +Missing +^^^^^^^ + +- +- + +MultiIndex +^^^^^^^^^^ + +- +- + +I/O +^^^ + +- +- + +Plotting +^^^^^^^^ + +- +- + +Groupby/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug in :meth:`Rolling.count` and :meth:`Expanding.count` argument ``min_periods`` ignored (:issue:`26996`) + +Reshaping +^^^^^^^^^ + +- +- Bug in :meth:`DataFrame.pivot_table` when only MultiIndexed columns is set (:issue:`17038`) + +Sparse +^^^^^^ + +- +- + +ExtensionArray +^^^^^^^^^^^^^^ + +- +- + + +Other +^^^^^ +- +- + +.. --------------------------------------------------------------------------- + +.. _whatsnew_110.contributors: + +Contributors +~~~~~~~~~~~~ From 4a9306917039435685fb793fe17654c1780532c1 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Tue, 21 Jan 2020 10:46:12 +0800 Subject: [PATCH 04/12] TST: added more tests (GH26996) --- doc/source/whatsnew/v1.0.0.rst | 1 + doc/source/whatsnew/v1.1.0.rst | 3 ++- pandas/tests/window/test_rolling.py | 36 +++++++++++++++++++++++++---- 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index a820ef132957a..95d2adc491fb2 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1165,6 +1165,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`) - Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`) - Bug in :meth:`GroupBy.pct_change` and :meth:`core.groupby.SeriesGroupBy.pct_change` causes ``TypeError`` when ``fill_method`` is ``None`` (:issue:`30463`) +- Bug in :meth:`Rolling.count` and :meth:`Expanding.count` argument ``min_periods`` ignored (:issue:`26996`) Reshaping ^^^^^^^^^ diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index e18a6331751e5..0a5ac87a0be75 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -133,7 +133,8 @@ Plotting Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- Bug in :meth:`Rolling.count` and :meth:`Expanding.count` argument ``min_periods`` ignored (:issue:`26996`) +- +- Reshaping ^^^^^^^^^ diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 26606973f5210..0ba8bbebbfdd9 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -428,8 +428,36 @@ def test_min_periods1(): tm.assert_series_equal(result, expected) -def test_rolling_count_with_min_periods(): +@pytest.mark.parametrize("test_series", [True, False]) +def test_rolling_count_with_min_periods(test_series): # GH 26996 - result = Series(range(5)).rolling(3, min_periods=3).count() - expected = Series([np.nan, np.nan, 3.0, 3.0, 3.0]) - tm.assert_series_equal(result, expected) + if test_series: + result = Series(range(5)).rolling(3, min_periods=3).count() + expected = Series([np.nan, np.nan, 3.0, 3.0, 3.0]) + tm.assert_series_equal(result, expected) + else: + result = DataFrame(range(5)).rolling(3, min_periods=3).count() + expected = DataFrame([np.nan, np.nan, 3.0, 3.0, 3.0]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("test_series", [True, False]) +def test_rolling_count_default_min_periods_with_null_values(test_series): + # GH 26996 + # We need rolling count to have default min_periods=0, + # as the method is meant to count how many non-null values, + # we want to by default produce a valid count even if + # there are very few valid entries in the window + values = [1, 2, 3, np.nan, 4, 5, 6] + expected_counts = [1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0] + + if test_series: + ser = Series(values) + result = ser.rolling(3).count() + expected = Series(expected_counts) + tm.assert_series_equal(result, expected) + else: + df = DataFrame(values) + result = df.rolling(3).count() + expected = DataFrame(expected_counts) + tm.assert_frame_equal(result, expected) From 14a4fcb6f0f7d80a1143985ade2a03f49b3f7f0c Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Tue, 21 Jan 2020 23:11:18 +0800 Subject: [PATCH 05/12] BUG: updated rolling and expanding count for consistency (GH26996) Updated the behavior of rolling and expanding count so that it becomes consistent with all other rolling and expanding functions. Also updated many test cases to reflect this change of behavior. --- pandas/core/window/rolling.py | 17 +++--- .../window/moments/test_moments_expanding.py | 6 +-- .../window/moments/test_moments_rolling.py | 52 +++++++++++++------ pandas/tests/window/test_api.py | 4 +- pandas/tests/window/test_dtypes.py | 8 +-- pandas/tests/window/test_rolling.py | 4 +- 6 files changed, 55 insertions(+), 36 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 7688206400caa..921e59b37d5c4 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1186,13 +1186,10 @@ def count(self): window = self._get_window() window = min(window, len(obj)) if not self.center else window - # We set the default value min_periods to be 0 because count method - # is meant to count NAs, we don't want it by default requires all - # values in the window to be valid to produce a valid count - min_periods = 0 if self.min_periods is None else self.min_periods - - # this is required as window is mutate above - min_periods = min(min_periods, window) + min_periods = self.min_periods + if min_periods is not None and not self.center: + # this is required as window is mutated above + min_periods = min(min_periods, window) results = [] for b in blocks: @@ -1665,7 +1662,11 @@ def _get_cov(X, Y): mean = lambda x: x.rolling( window, self.min_periods, center=self.center ).mean(**kwargs) - count = (X + Y).rolling(window=window, center=self.center).count(**kwargs) + count = ( + (X + Y) + .rolling(window=window, min_periods=0, center=self.center) + .count(**kwargs) + ) bias_adj = count / (count - ddof) return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj diff --git a/pandas/tests/window/moments/test_moments_expanding.py b/pandas/tests/window/moments/test_moments_expanding.py index 4596552d8f255..983aa30560688 100644 --- a/pandas/tests/window/moments/test_moments_expanding.py +++ b/pandas/tests/window/moments/test_moments_expanding.py @@ -38,9 +38,9 @@ def test_expanding_corr(self): tm.assert_almost_equal(rolling_result, result) def test_expanding_count(self): - result = self.series.expanding().count() + result = self.series.expanding(min_periods=0).count() tm.assert_almost_equal( - result, self.series.rolling(window=len(self.series)).count() + result, self.series.rolling(window=len(self.series), min_periods=0).count() ) def test_expanding_quantile(self): @@ -358,7 +358,7 @@ def test_expanding_consistency(self, min_periods): ) self._test_moments_consistency( min_periods=min_periods, - count=lambda x: x.expanding().count(), + count=lambda x: x.expanding(min_periods=min_periods).count(), mean=lambda x: x.expanding(min_periods=min_periods).mean(), corr=lambda x, y: x.expanding(min_periods=min_periods).corr(y), var_unbiased=lambda x: x.expanding(min_periods=min_periods).var(), diff --git a/pandas/tests/window/moments/test_moments_rolling.py b/pandas/tests/window/moments/test_moments_rolling.py index 9acb4ffcb40b8..2de3c25b6d78e 100644 --- a/pandas/tests/window/moments/test_moments_rolling.py +++ b/pandas/tests/window/moments/test_moments_rolling.py @@ -777,8 +777,8 @@ def get_result(obj, window, min_periods=None, center=False): series_result = get_result(series, window=win, min_periods=minp) frame_result = get_result(frame, window=win, min_periods=minp) else: - series_result = get_result(series, window=win) - frame_result = get_result(frame, window=win) + series_result = get_result(series, window=win, min_periods=0) + frame_result = get_result(frame, window=win, min_periods=0) last_date = series_result.index[-1] prev_date = last_date - 24 * offsets.BDay() @@ -851,10 +851,11 @@ def get_result(obj, window, min_periods=None, center=False): pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=15 )[9:].reset_index(drop=True) else: - result = get_result(obj, 20, center=True) - expected = get_result(pd.concat([obj, Series([np.NaN] * 9)]), 20)[ - 9: - ].reset_index(drop=True) + result = get_result(obj, 20, min_periods=0, center=True) + print(result) + expected = get_result( + pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=0 + )[9:].reset_index(drop=True) tm.assert_series_equal(result, expected) @@ -893,21 +894,27 @@ def get_result(obj, window, min_periods=None, center=False): else: series_xp = ( get_result( - self.series.reindex(list(self.series.index) + s), window=25 + self.series.reindex(list(self.series.index) + s), + window=25, + min_periods=0, ) .shift(-12) .reindex(self.series.index) ) frame_xp = ( get_result( - self.frame.reindex(list(self.frame.index) + s), window=25 + self.frame.reindex(list(self.frame.index) + s), + window=25, + min_periods=0, ) .shift(-12) .reindex(self.frame.index) ) - series_rs = get_result(self.series, window=25, center=True) - frame_rs = get_result(self.frame, window=25, center=True) + series_rs = get_result( + self.series, window=25, min_periods=0, center=True + ) + frame_rs = get_result(self.frame, window=25, min_periods=0, center=True) if fill_value is not None: series_xp = series_xp.fillna(fill_value) @@ -964,7 +971,11 @@ def test_rolling_consistency(self, window, min_periods, center): self._test_moments_consistency_is_constant( min_periods=min_periods, - count=lambda x: (x.rolling(window=window, center=center).count()), + count=lambda x: ( + x.rolling( + window=window, min_periods=min_periods, center=center + ).count() + ), mean=lambda x: ( x.rolling( window=window, min_periods=min_periods, center=center @@ -989,19 +1000,26 @@ def test_rolling_consistency(self, window, min_periods, center): ).var(ddof=0) ), var_debiasing_factors=lambda x: ( - x.rolling(window=window, center=center) + x.rolling(window=window, min_periods=min_periods, center=center) .count() .divide( - (x.rolling(window=window, center=center).count() - 1.0).replace( - 0.0, np.nan - ) + ( + x.rolling( + window=window, min_periods=min_periods, center=center + ).count() + - 1.0 + ).replace(0.0, np.nan) ) ), ) self._test_moments_consistency( min_periods=min_periods, - count=lambda x: (x.rolling(window=window, center=center).count()), + count=lambda x: ( + x.rolling( + window=window, min_periods=min_periods, center=center + ).count() + ), mean=lambda x: ( x.rolling( window=window, min_periods=min_periods, center=center @@ -1071,7 +1089,7 @@ def test_rolling_consistency(self, window, min_periods, center): if name == "count": rolling_f_result = rolling_f() rolling_apply_f_result = x.rolling( - window=window, min_periods=0, center=center + window=window, min_periods=min_periods, center=center ).apply(func=f, raw=True) else: if name in ["cov", "corr"]: diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py index 5e70e13209de5..680237db0535b 100644 --- a/pandas/tests/window/test_api.py +++ b/pandas/tests/window/test_api.py @@ -237,10 +237,10 @@ def test_count_nonnumeric_types(self): columns=cols, ) - result = df.rolling(window=2).count() + result = df.rolling(window=2, min_periods=0).count() tm.assert_frame_equal(result, expected) - result = df.rolling(1).count() + result = df.rolling(1, min_periods=0).count() expected = df.notna().astype(float) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/window/test_dtypes.py b/pandas/tests/window/test_dtypes.py index b1c9b66ab09d3..35f93b1262f59 100644 --- a/pandas/tests/window/test_dtypes.py +++ b/pandas/tests/window/test_dtypes.py @@ -34,7 +34,7 @@ class Dtype: def get_expects(self): expects = { "sr1": { - "count": Series([1, 2, 2, 2, 2], dtype="float64"), + "count": Series([np.nan, 2, 2, 2, 2], dtype="float64"), "max": Series([np.nan, 1, 2, 3, 4], dtype="float64"), "min": Series([np.nan, 0, 1, 2, 3], dtype="float64"), "sum": Series([np.nan, 1, 3, 5, 7], dtype="float64"), @@ -44,7 +44,7 @@ def get_expects(self): "median": Series([np.nan, 0.5, 1.5, 2.5, 3.5], dtype="float64"), }, "sr2": { - "count": Series([1, 2, 2, 2, 2], dtype="float64"), + "count": Series([np.nan, 2, 2, 2, 2], dtype="float64"), "max": Series([np.nan, 10, 8, 6, 4], dtype="float64"), "min": Series([np.nan, 8, 6, 4, 2], dtype="float64"), "sum": Series([np.nan, 18, 14, 10, 6], dtype="float64"), @@ -54,7 +54,7 @@ def get_expects(self): "median": Series([np.nan, 9, 7, 5, 3], dtype="float64"), }, "sr3": { - "count": Series([1, 2, 2, 1, 1], dtype="float64"), + "count": Series([np.nan, 2, 2, 1, 1], dtype="float64"), "max": Series([np.nan, 1, 2, np.nan, np.nan], dtype="float64"), "min": Series([np.nan, 0, 1, np.nan, np.nan], dtype="float64"), "sum": Series([np.nan, 1, 3, np.nan, np.nan], dtype="float64"), @@ -67,7 +67,7 @@ def get_expects(self): }, "df": { "count": DataFrame( - {0: Series([1, 2, 2, 2, 2]), 1: Series([1, 2, 2, 2, 2])}, + {0: Series([np.nan, 2, 2, 2, 2]), 1: Series([np.nan, 2, 2, 2, 2])}, dtype="float64", ), "max": DataFrame( diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 0ba8bbebbfdd9..40a4a59f001a5 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -324,7 +324,7 @@ def test_rolling_axis_count(self, axis_frame): else: expected = DataFrame({"x": [1.0, 1.0, 1.0], "y": [2.0, 2.0, 2.0]}) - result = df.rolling(2, axis=axis_frame).count() + result = df.rolling(2, axis=axis_frame, min_periods=0).count() tm.assert_frame_equal(result, expected) def test_readonly_array(self): @@ -449,7 +449,7 @@ def test_rolling_count_default_min_periods_with_null_values(test_series): # we want to by default produce a valid count even if # there are very few valid entries in the window values = [1, 2, 3, np.nan, 4, 5, 6] - expected_counts = [1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0] + expected_counts = [np.nan, np.nan, 3.0, 2.0, 2.0, 2.0, 3.0] if test_series: ser = Series(values) From 4988b4ff5615bce103861c8f575b5f614886f3a8 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Wed, 22 Jan 2020 23:07:40 +0800 Subject: [PATCH 06/12] CLN: further cleaned code (GH26996) --- pandas/core/window/rolling.py | 13 ++----------- pandas/tests/window/moments/test_moments_rolling.py | 4 ++-- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 921e59b37d5c4..4f70de07d1e0a 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1182,22 +1182,13 @@ class _Rolling_and_Expanding(_Rolling): def count(self): blocks, obj = self._create_blocks() - - window = self._get_window() - window = min(window, len(obj)) if not self.center else window - - min_periods = self.min_periods - if min_periods is not None and not self.center: - # this is required as window is mutated above - min_periods = min(min_periods, window) - results = [] for b in blocks: result = b.notna().astype(int) result = self._constructor( result, - window=window, - min_periods=min_periods, + window=self._get_window(), + min_periods=self.min_periods, center=self.center, axis=self.axis, closed=self.closed, diff --git a/pandas/tests/window/moments/test_moments_rolling.py b/pandas/tests/window/moments/test_moments_rolling.py index 2de3c25b6d78e..83e4ee25558b5 100644 --- a/pandas/tests/window/moments/test_moments_rolling.py +++ b/pandas/tests/window/moments/test_moments_rolling.py @@ -835,8 +835,8 @@ def get_result(obj, window, min_periods=None, center=False): nan_mask = ~nan_mask tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) else: - result = get_result(self.series, len(self.series) + 1) - expected = get_result(self.series, len(self.series)) + result = get_result(self.series, len(self.series) + 1, min_periods=0) + expected = get_result(self.series, len(self.series), min_periods=0) nan_mask = isna(result) tm.assert_series_equal(nan_mask, isna(expected)) From 1e48ac3540f123e5a2336aa0eab1b645d8ad2ffc Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Fri, 24 Jan 2020 14:20:15 +0800 Subject: [PATCH 07/12] TST: cleaned up tests (GH26996) --- pandas/tests/window/test_rolling.py | 32 +++++++++-------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 40a4a59f001a5..edccec3f8cc6b 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -428,21 +428,16 @@ def test_min_periods1(): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("test_series", [True, False]) -def test_rolling_count_with_min_periods(test_series): +@pytest.mark.parametrize("constructor", [Series, DataFrame]) +def test_rolling_count_with_min_periods(constructor): # GH 26996 - if test_series: - result = Series(range(5)).rolling(3, min_periods=3).count() - expected = Series([np.nan, np.nan, 3.0, 3.0, 3.0]) - tm.assert_series_equal(result, expected) - else: - result = DataFrame(range(5)).rolling(3, min_periods=3).count() - expected = DataFrame([np.nan, np.nan, 3.0, 3.0, 3.0]) - tm.assert_frame_equal(result, expected) + result = constructor(range(5)).rolling(3, min_periods=3).count() + expected = constructor([np.nan, np.nan, 3.0, 3.0, 3.0]) + tm.assert_equal(result, expected) -@pytest.mark.parametrize("test_series", [True, False]) -def test_rolling_count_default_min_periods_with_null_values(test_series): +@pytest.mark.parametrize("constructor", [Series, DataFrame]) +def test_rolling_count_default_min_periods_with_null_values(constructor): # GH 26996 # We need rolling count to have default min_periods=0, # as the method is meant to count how many non-null values, @@ -451,13 +446,6 @@ def test_rolling_count_default_min_periods_with_null_values(test_series): values = [1, 2, 3, np.nan, 4, 5, 6] expected_counts = [np.nan, np.nan, 3.0, 2.0, 2.0, 2.0, 3.0] - if test_series: - ser = Series(values) - result = ser.rolling(3).count() - expected = Series(expected_counts) - tm.assert_series_equal(result, expected) - else: - df = DataFrame(values) - result = df.rolling(3).count() - expected = DataFrame(expected_counts) - tm.assert_frame_equal(result, expected) + result = constructor(values).rolling(3).count() + expected = constructor(expected_counts) + tm.assert_equal(result, expected) From c091787eb9dfc95623795efe389811e7efbc9d37 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Fri, 24 Jan 2020 15:28:38 +0800 Subject: [PATCH 08/12] BUG: changed min_periods default to 0 for rolling and expanding (GH26996) --- pandas/_libs/tslibs/timestamps.pyx | 3 +++ pandas/core/window/rolling.py | 2 +- pandas/tests/window/test_dtypes.py | 8 ++++---- pandas/tests/window/test_expanding.py | 20 ++++++++++++++++---- pandas/tests/window/test_rolling.py | 6 +----- 5 files changed, 25 insertions(+), 14 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index abe7f9e5b4105..2cd99f736017b 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -374,11 +374,13 @@ class Timestamp(_Timestamp): # Mixing pydatetime positional and keyword arguments is forbidden! cdef _TSObject ts + print('haha') _date_attributes = [year, month, day, hour, minute, second, microsecond, nanosecond] if tzinfo is not None: + print('tzinfo not None') if not PyTZInfo_Check(tzinfo): # tzinfo must be a datetime.tzinfo object, GH#17690 raise TypeError(f'tzinfo must be a datetime.tzinfo object, ' @@ -390,6 +392,7 @@ class Timestamp(_Timestamp): tz, tzinfo = tzinfo, None if isinstance(ts_input, str): + print('ts_input is str') # User passed a date string to parse. # Check that the user didn't also pass a date attribute kwarg. if any(arg is not None for arg in _date_attributes): diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 4f70de07d1e0a..825a22606ed7f 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1188,7 +1188,7 @@ def count(self): result = self._constructor( result, window=self._get_window(), - min_periods=self.min_periods, + min_periods=self.min_periods if self.min_periods else 0, center=self.center, axis=self.axis, closed=self.closed, diff --git a/pandas/tests/window/test_dtypes.py b/pandas/tests/window/test_dtypes.py index 35f93b1262f59..b1c9b66ab09d3 100644 --- a/pandas/tests/window/test_dtypes.py +++ b/pandas/tests/window/test_dtypes.py @@ -34,7 +34,7 @@ class Dtype: def get_expects(self): expects = { "sr1": { - "count": Series([np.nan, 2, 2, 2, 2], dtype="float64"), + "count": Series([1, 2, 2, 2, 2], dtype="float64"), "max": Series([np.nan, 1, 2, 3, 4], dtype="float64"), "min": Series([np.nan, 0, 1, 2, 3], dtype="float64"), "sum": Series([np.nan, 1, 3, 5, 7], dtype="float64"), @@ -44,7 +44,7 @@ def get_expects(self): "median": Series([np.nan, 0.5, 1.5, 2.5, 3.5], dtype="float64"), }, "sr2": { - "count": Series([np.nan, 2, 2, 2, 2], dtype="float64"), + "count": Series([1, 2, 2, 2, 2], dtype="float64"), "max": Series([np.nan, 10, 8, 6, 4], dtype="float64"), "min": Series([np.nan, 8, 6, 4, 2], dtype="float64"), "sum": Series([np.nan, 18, 14, 10, 6], dtype="float64"), @@ -54,7 +54,7 @@ def get_expects(self): "median": Series([np.nan, 9, 7, 5, 3], dtype="float64"), }, "sr3": { - "count": Series([np.nan, 2, 2, 1, 1], dtype="float64"), + "count": Series([1, 2, 2, 1, 1], dtype="float64"), "max": Series([np.nan, 1, 2, np.nan, np.nan], dtype="float64"), "min": Series([np.nan, 0, 1, np.nan, np.nan], dtype="float64"), "sum": Series([np.nan, 1, 3, np.nan, np.nan], dtype="float64"), @@ -67,7 +67,7 @@ def get_expects(self): }, "df": { "count": DataFrame( - {0: Series([np.nan, 2, 2, 2, 2]), 1: Series([np.nan, 2, 2, 2, 2])}, + {0: Series([1, 2, 2, 2, 2]), 1: Series([1, 2, 2, 2, 2])}, dtype="float64", ), "max": DataFrame( diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index 58ad20e473560..6b6367fd80b26 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -115,8 +115,20 @@ def test_expanding_axis(self, axis_frame): tm.assert_frame_equal(result, expected) -def test_expanding_count_with_min_periods(): +@pytest.mark.parametrize("constructor", [Series, DataFrame]) +def test_expanding_count_with_min_periods(constructor): # GH 26996 - result = Series(range(5)).expanding(min_periods=3).count() - expected = Series([np.nan, np.nan, 3.0, 4.0, 5.0]) - tm.assert_series_equal(result, expected) + result = constructor(range(5)).expanding(min_periods=3).count() + expected = constructor([np.nan, np.nan, 3.0, 4.0, 5.0]) + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize("constructor", [Series, DataFrame]) +def test_expanding_count_default_min_periods_with_null_values(constructor): + # GH 26996 + values = [1, 2, 3, np.nan, 4, 5, 6] + expected_counts = [1.0, 2.0, 3.0, 3.0, 4.0, 5.0, 6.0] + + result = constructor(values).expanding().count() + expected = constructor(expected_counts) + tm.assert_equal(result, expected) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index edccec3f8cc6b..80a732c6f88df 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -439,12 +439,8 @@ def test_rolling_count_with_min_periods(constructor): @pytest.mark.parametrize("constructor", [Series, DataFrame]) def test_rolling_count_default_min_periods_with_null_values(constructor): # GH 26996 - # We need rolling count to have default min_periods=0, - # as the method is meant to count how many non-null values, - # we want to by default produce a valid count even if - # there are very few valid entries in the window values = [1, 2, 3, np.nan, 4, 5, 6] - expected_counts = [np.nan, np.nan, 3.0, 2.0, 2.0, 2.0, 3.0] + expected_counts = [1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0] result = constructor(values).rolling(3).count() expected = constructor(expected_counts) From f905cbbd5452af6c34cbffa52668713bf1177e4e Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Fri, 24 Jan 2020 15:30:38 +0800 Subject: [PATCH 09/12] BUG: reverted non-relevant changes accidentally added (GH26996) --- pandas/_libs/tslibs/timestamps.pyx | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 2cd99f736017b..abe7f9e5b4105 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -374,13 +374,11 @@ class Timestamp(_Timestamp): # Mixing pydatetime positional and keyword arguments is forbidden! cdef _TSObject ts - print('haha') _date_attributes = [year, month, day, hour, minute, second, microsecond, nanosecond] if tzinfo is not None: - print('tzinfo not None') if not PyTZInfo_Check(tzinfo): # tzinfo must be a datetime.tzinfo object, GH#17690 raise TypeError(f'tzinfo must be a datetime.tzinfo object, ' @@ -392,7 +390,6 @@ class Timestamp(_Timestamp): tz, tzinfo = tzinfo, None if isinstance(ts_input, str): - print('ts_input is str') # User passed a date string to parse. # Check that the user didn't also pass a date attribute kwarg. if any(arg is not None for arg in _date_attributes): From 0a27a2175c4a1592a714b3045aa94c51c2f15063 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Sat, 25 Jan 2020 09:40:38 +0800 Subject: [PATCH 10/12] BUG: small change in whatsnew (GH26996) --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 95d2adc491fb2..9d5289ae8f2d0 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1165,7 +1165,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`) - Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`) - Bug in :meth:`GroupBy.pct_change` and :meth:`core.groupby.SeriesGroupBy.pct_change` causes ``TypeError`` when ``fill_method`` is ``None`` (:issue:`30463`) -- Bug in :meth:`Rolling.count` and :meth:`Expanding.count` argument ``min_periods`` ignored (:issue:`26996`) +- Bug in :meth:`Rolling.count` and :meth:`Expanding.count` argument where ``min_periods`` was ignored (:issue:`26996`) Reshaping ^^^^^^^^^ From 337f1ae98881704875a16deb1230520cb87ec6a9 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Sun, 26 Jan 2020 00:50:10 +0800 Subject: [PATCH 11/12] CLN: slight cleanup (GH26996) --- pandas/core/window/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 825a22606ed7f..c1e34757b45d4 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1188,7 +1188,7 @@ def count(self): result = self._constructor( result, window=self._get_window(), - min_periods=self.min_periods if self.min_periods else 0, + min_periods=self.min_periods or 0, center=self.center, axis=self.axis, closed=self.closed, From 12e1f09b8281915a61b907324b82afa43b8dd94b Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Sun, 26 Jan 2020 10:38:13 +0800 Subject: [PATCH 12/12] removed redundant file --- doc/source/whatsnew/v1.1.0.rst | 168 --------------------------------- 1 file changed, 168 deletions(-) delete mode 100644 doc/source/whatsnew/v1.1.0.rst diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst deleted file mode 100644 index 0a5ac87a0be75..0000000000000 --- a/doc/source/whatsnew/v1.1.0.rst +++ /dev/null @@ -1,168 +0,0 @@ -.. _whatsnew_110: - -What's new in 1.1.0 (??) ------------------------- - -These are the changes in pandas 1.1.0. See :ref:`release` for a full changelog -including other versions of pandas. - -{{ header }} - -.. --------------------------------------------------------------------------- - -Enhancements -~~~~~~~~~~~~ - -.. _whatsnew_110.enhancements.other: - -Other enhancements -^^^^^^^^^^^^^^^^^^ - -- -- - - -.. --------------------------------------------------------------------------- - -.. _whatsnew_110.deprecations: - -Deprecations -~~~~~~~~~~~~ - -- -- - -.. --------------------------------------------------------------------------- - - -.. _whatsnew_110.performance: - -Performance improvements -~~~~~~~~~~~~~~~~~~~~~~~~ - -- -- - -.. --------------------------------------------------------------------------- - -.. _whatsnew_110.bug_fixes: - -Bug fixes -~~~~~~~~~ - - -Categorical -^^^^^^^^^^^ - -- -- - -Datetimelike -^^^^^^^^^^^^ -- Bug in :class:`Timestamp` where constructing :class:`Timestamp` from ambiguous epoch time and calling constructor again changed :meth:`Timestamp.value` property (:issue:`24329`) -- -- - -Timedelta -^^^^^^^^^ - -- -- - -Timezones -^^^^^^^^^ - -- -- - - -Numeric -^^^^^^^ -- -- - -Conversion -^^^^^^^^^^ -- Bug in :class:`Series` construction from NumPy array with big-endian ``datetime64`` dtype (:issue:`29684`) -- -- - -Strings -^^^^^^^ - -- -- - - -Interval -^^^^^^^^ - -- -- - -Indexing -^^^^^^^^ - -- -- - -Missing -^^^^^^^ - -- -- - -MultiIndex -^^^^^^^^^^ - -- -- - -I/O -^^^ - -- -- - -Plotting -^^^^^^^^ - -- -- - -Groupby/resample/rolling -^^^^^^^^^^^^^^^^^^^^^^^^ - -- -- - -Reshaping -^^^^^^^^^ - -- -- Bug in :meth:`DataFrame.pivot_table` when only MultiIndexed columns is set (:issue:`17038`) - -Sparse -^^^^^^ - -- -- - -ExtensionArray -^^^^^^^^^^^^^^ - -- -- - - -Other -^^^^^ -- -- - -.. --------------------------------------------------------------------------- - -.. _whatsnew_110.contributors: - -Contributors -~~~~~~~~~~~~