From 803a166db2b96a7916b56d5b482c0d37ae2011a3 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 15 Feb 2020 09:49:33 -0600 Subject: [PATCH 01/13] CLN: Clean test_function.py --- pandas/tests/groupby/test_function.py | 102 ++++++++++++-------------- 1 file changed, 48 insertions(+), 54 deletions(-) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 73e36cb5e6c84..92583eb4e4569 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -222,13 +222,13 @@ def test_arg_passthru(): tm.assert_index_equal(result.columns, expected_columns) expected_columns = Index(["int", "float", "string", "category_int", "timedelta"]) - for attr in ["sum"]: - f = getattr(df.groupby("group"), attr) - result = f() - tm.assert_index_equal(result.columns, expected_columns_numeric) - result = f(numeric_only=False) - tm.assert_index_equal(result.columns, expected_columns) + f = getattr(df.groupby("group"), "sum") + result = f() + tm.assert_index_equal(result.columns, expected_columns_numeric) + + result = f(numeric_only=False) + tm.assert_index_equal(result.columns, expected_columns) expected_columns = Index(["int", "float", "category_int"]) for attr in ["prod", "cumprod"]: @@ -253,13 +253,13 @@ def test_arg_passthru(): tm.assert_index_equal(result.columns, expected_columns) expected_columns = Index(["int", "float", "category_int", "timedelta"]) - for attr in ["cumsum"]: - f = getattr(df.groupby("group"), attr) - result = f() - tm.assert_index_equal(result.columns, expected_columns_numeric) - result = f(numeric_only=False) - tm.assert_index_equal(result.columns, expected_columns) + f = getattr(df.groupby("group"), "cumsum") + result = f() + tm.assert_index_equal(result.columns, expected_columns_numeric) + + result = f(numeric_only=False) + tm.assert_index_equal(result.columns, expected_columns) def test_non_cython_api(): @@ -685,59 +685,53 @@ def test_numpy_compat(func): getattr(g, func)(foo=1) -def test_cummin_cummax(): +@pytest.mark.parametrize("dtype", [np.int32, np.int64, np.float32, np.float64]) +def test_cummin_cummax(dtype): + min_val = ( + np.iinfo(dtype).min if np.dtype(dtype).kind == "i" else np.finfo(dtype).min + ) + max_val = ( + np.iinfo(dtype).max if np.dtype(dtype).kind == "i" else np.finfo(dtype).max + ) + # GH 15048 - num_types = [np.int32, np.int64, np.float32, np.float64] - num_mins = [ - np.iinfo(np.int32).min, - np.iinfo(np.int64).min, - np.finfo(np.float32).min, - np.finfo(np.float64).min, - ] - num_max = [ - np.iinfo(np.int32).max, - np.iinfo(np.int64).max, - np.finfo(np.float32).max, - np.finfo(np.float64).max, - ] base_df = pd.DataFrame( {"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [3, 4, 3, 2, 2, 3, 2, 1]} ) expected_mins = [3, 3, 3, 2, 2, 2, 2, 1] expected_maxs = [3, 4, 4, 4, 2, 3, 3, 3] - for dtype, min_val, max_val in zip(num_types, num_mins, num_max): - df = base_df.astype(dtype) + df = base_df.astype(dtype) - # cummin - expected = pd.DataFrame({"B": expected_mins}).astype(dtype) - result = df.groupby("A").cummin() - tm.assert_frame_equal(result, expected) - result = df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() - tm.assert_frame_equal(result, expected) + # cummin + expected = pd.DataFrame({"B": expected_mins}).astype(dtype) + result = df.groupby("A").cummin() + tm.assert_frame_equal(result, expected) + result = df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() + tm.assert_frame_equal(result, expected) - # Test cummin w/ min value for dtype - df.loc[[2, 6], "B"] = min_val - expected.loc[[2, 3, 6, 7], "B"] = min_val - result = df.groupby("A").cummin() - tm.assert_frame_equal(result, expected) - expected = df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() - tm.assert_frame_equal(result, expected) + # Test cummin w/ min value for dtype + df.loc[[2, 6], "B"] = min_val + expected.loc[[2, 3, 6, 7], "B"] = min_val + result = df.groupby("A").cummin() + tm.assert_frame_equal(result, expected) + expected = df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() + tm.assert_frame_equal(result, expected) - # cummax - expected = pd.DataFrame({"B": expected_maxs}).astype(dtype) - result = df.groupby("A").cummax() - tm.assert_frame_equal(result, expected) - result = df.groupby("A").B.apply(lambda x: x.cummax()).to_frame() - tm.assert_frame_equal(result, expected) + # cummax + expected = pd.DataFrame({"B": expected_maxs}).astype(dtype) + result = df.groupby("A").cummax() + tm.assert_frame_equal(result, expected) + result = df.groupby("A").B.apply(lambda x: x.cummax()).to_frame() + tm.assert_frame_equal(result, expected) - # Test cummax w/ max value for dtype - df.loc[[2, 6], "B"] = max_val - expected.loc[[2, 3, 6, 7], "B"] = max_val - result = df.groupby("A").cummax() - tm.assert_frame_equal(result, expected) - expected = df.groupby("A").B.apply(lambda x: x.cummax()).to_frame() - tm.assert_frame_equal(result, expected) + # Test cummax w/ max value for dtype + df.loc[[2, 6], "B"] = max_val + expected.loc[[2, 3, 6, 7], "B"] = max_val + result = df.groupby("A").cummax() + tm.assert_frame_equal(result, expected) + expected = df.groupby("A").B.apply(lambda x: x.cummax()).to_frame() + tm.assert_frame_equal(result, expected) # Test nan in some values base_df.loc[[0, 2, 4, 6], "B"] = np.nan From 5f56e75e7f00fb560dd930239a6619c44c6208a2 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 15 Feb 2020 12:35:55 -0600 Subject: [PATCH 02/13] Move into params --- pandas/tests/groupby/test_function.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 92583eb4e4569..3fa901adf4c62 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -685,15 +685,16 @@ def test_numpy_compat(func): getattr(g, func)(foo=1) -@pytest.mark.parametrize("dtype", [np.int32, np.int64, np.float32, np.float64]) -def test_cummin_cummax(dtype): - min_val = ( - np.iinfo(dtype).min if np.dtype(dtype).kind == "i" else np.finfo(dtype).min - ) - max_val = ( - np.iinfo(dtype).max if np.dtype(dtype).kind == "i" else np.finfo(dtype).max - ) - +@pytest.mark.parametrize( + "dtype, min_val, max_val", + [ + (np.int32, np.iinfo(np.int32).min, np.iinfo(np.int32).max), + (np.int64, np.iinfo(np.int64).min, np.iinfo(np.int64).max), + (np.float32, np.finfo(np.float32).min, np.finfo(np.float32).max), + (np.float64, np.finfo(np.float64).min, np.finfo(np.float64).max), + ], +) +def test_cummin_cummax(dtype, min_val, max_val): # GH 15048 base_df = pd.DataFrame( {"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [3, 4, 3, 2, 2, 3, 2, 1]} From 761c7e641acf65e94e6f97aed9325d35d580bf43 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 15 Feb 2020 16:27:43 -0600 Subject: [PATCH 03/13] Break into two tests --- pandas/tests/groupby/test_function.py | 81 +++++++++++++++++++-------- 1 file changed, 57 insertions(+), 24 deletions(-) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 36f03691332e3..48717cb2feeb6 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -698,24 +698,22 @@ def test_numpy_compat(func): @pytest.mark.xfail( _is_numpy_dev, reason="https://github.com/pandas-dev/pandas/issues/31992" ) -def test_cummin_cummax(dtype, min_val, max_val): +def test_cummin(dtype, min_val, max_val): # GH 15048 base_df = pd.DataFrame( {"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [3, 4, 3, 2, 2, 3, 2, 1]} ) expected_mins = [3, 3, 3, 2, 2, 2, 2, 1] - expected_maxs = [3, 4, 4, 4, 2, 3, 3, 3] df = base_df.astype(dtype) - # cummin expected = pd.DataFrame({"B": expected_mins}).astype(dtype) result = df.groupby("A").cummin() tm.assert_frame_equal(result, expected) result = df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() tm.assert_frame_equal(result, expected) - # Test cummin w/ min value for dtype + # Test w/ min value for dtype df.loc[[2, 6], "B"] = min_val expected.loc[[2, 3, 6, 7], "B"] = min_val result = df.groupby("A").cummin() @@ -723,14 +721,64 @@ def test_cummin_cummax(dtype, min_val, max_val): expected = df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() tm.assert_frame_equal(result, expected) - # cummax + # Test nan in some values + base_df.loc[[0, 2, 4, 6], "B"] = np.nan + expected = pd.DataFrame({"B": [np.nan, 4, np.nan, 2, np.nan, 3, np.nan, 1]}) + result = base_df.groupby("A").cummin() + tm.assert_frame_equal(result, expected) + expected = base_df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() + tm.assert_frame_equal(result, expected) + + # Test nan in entire column + base_df["B"] = np.nan + expected = pd.DataFrame({"B": [np.nan] * 8}) + result = base_df.groupby("A").cummin() + tm.assert_frame_equal(expected, result) + result = base_df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() + tm.assert_frame_equal(expected, result) + + # GH 15561 + df = pd.DataFrame(dict(a=[1], b=pd.to_datetime(["2001"]))) + expected = pd.Series(pd.to_datetime("2001"), index=[0], name="b") + + result = df.groupby("a")["b"].cummin() + tm.assert_series_equal(expected, result) + + # GH 15635 + df = pd.DataFrame(dict(a=[1, 2, 1], b=[1, 2, 2])) + result = df.groupby("a").b.cummin() + expected = pd.Series([1, 2, 1], name="b") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "dtype, min_val, max_val", + [ + (np.int32, np.iinfo(np.int32).min, np.iinfo(np.int32).max), + (np.int64, np.iinfo(np.int64).min, np.iinfo(np.int64).max), + (np.float32, np.finfo(np.float32).min, np.finfo(np.float32).max), + (np.float64, np.finfo(np.float64).min, np.finfo(np.float64).max), + ], +) +@pytest.mark.xfail( + _is_numpy_dev, reason="https://github.com/pandas-dev/pandas/issues/31992" +) +def test_cummax(dtype, min_val, max_val): + # GH 15048 + base_df = pd.DataFrame( + {"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [3, 4, 3, 2, 2, 3, 2, 1]} + ) + expected_maxs = [3, 4, 4, 4, 2, 3, 3, 3] + + df = base_df.astype(dtype) + expected = pd.DataFrame({"B": expected_maxs}).astype(dtype) result = df.groupby("A").cummax() tm.assert_frame_equal(result, expected) result = df.groupby("A").B.apply(lambda x: x.cummax()).to_frame() tm.assert_frame_equal(result, expected) - # Test cummax w/ max value for dtype + # Test w/ max value for dtype df.loc[[2, 6], "B"] = max_val expected.loc[[2, 3, 6, 7], "B"] = max_val result = df.groupby("A").cummax() @@ -740,12 +788,6 @@ def test_cummin_cummax(dtype, min_val, max_val): # Test nan in some values base_df.loc[[0, 2, 4, 6], "B"] = np.nan - expected = pd.DataFrame({"B": [np.nan, 4, np.nan, 2, np.nan, 3, np.nan, 1]}) - result = base_df.groupby("A").cummin() - tm.assert_frame_equal(result, expected) - expected = base_df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() - tm.assert_frame_equal(result, expected) - expected = pd.DataFrame({"B": [np.nan, 4, np.nan, 4, np.nan, 3, np.nan, 3]}) result = base_df.groupby("A").cummax() tm.assert_frame_equal(result, expected) @@ -755,10 +797,6 @@ def test_cummin_cummax(dtype, min_val, max_val): # Test nan in entire column base_df["B"] = np.nan expected = pd.DataFrame({"B": [np.nan] * 8}) - result = base_df.groupby("A").cummin() - tm.assert_frame_equal(expected, result) - result = base_df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() - tm.assert_frame_equal(expected, result) result = base_df.groupby("A").cummax() tm.assert_frame_equal(expected, result) result = base_df.groupby("A").B.apply(lambda x: x.cummax()).to_frame() @@ -767,9 +805,9 @@ def test_cummin_cummax(dtype, min_val, max_val): # GH 15561 df = pd.DataFrame(dict(a=[1], b=pd.to_datetime(["2001"]))) expected = pd.Series(pd.to_datetime("2001"), index=[0], name="b") - for method in ["cummax", "cummin"]: - result = getattr(df.groupby("a")["b"], method)() - tm.assert_series_equal(expected, result) + + result = df.groupby("a")["b"].cummax() + tm.assert_series_equal(expected, result) # GH 15635 df = pd.DataFrame(dict(a=[1, 2, 1], b=[2, 1, 1])) @@ -777,11 +815,6 @@ def test_cummin_cummax(dtype, min_val, max_val): expected = pd.Series([2, 1, 2], name="b") tm.assert_series_equal(result, expected) - df = pd.DataFrame(dict(a=[1, 2, 1], b=[1, 2, 2])) - result = df.groupby("a").b.cummin() - expected = pd.Series([1, 2, 1], name="b") - tm.assert_series_equal(result, expected) - @pytest.mark.parametrize( "in_vals, out_vals", From bac1eb5fcfcbd8e51c3a91222d3edca6c8e501a9 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 15 Feb 2020 19:06:00 -0600 Subject: [PATCH 04/13] Remove some more loops --- pandas/tests/reductions/test_reductions.py | 110 +++++++++++---------- 1 file changed, 57 insertions(+), 53 deletions(-) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 0b312fe2f8990..b2d41ed7d04d1 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -66,60 +66,64 @@ def test_ops(self, opname, obj): expected = expected.astype("M8[ns]").astype("int64") assert result.value == expected - def test_nanops(self): + @pytest.mark.parametrize("opname", ["max", "min"]) + @pytest.mark.parametrize("klass", [Index, Series]) + def test_nanops(self, opname, klass): # GH#7261 - for opname in ["max", "min"]: - for klass in [Index, Series]: - arg_op = "arg" + opname if klass is Index else "idx" + opname - - obj = klass([np.nan, 2.0]) - assert getattr(obj, opname)() == 2.0 - - obj = klass([np.nan]) - assert pd.isna(getattr(obj, opname)()) - assert pd.isna(getattr(obj, opname)(skipna=False)) - - obj = klass([], dtype=object) - assert pd.isna(getattr(obj, opname)()) - assert pd.isna(getattr(obj, opname)(skipna=False)) - - obj = klass([pd.NaT, datetime(2011, 11, 1)]) - # check DatetimeIndex monotonic path - assert getattr(obj, opname)() == datetime(2011, 11, 1) - assert getattr(obj, opname)(skipna=False) is pd.NaT - - assert getattr(obj, arg_op)() == 1 - result = getattr(obj, arg_op)(skipna=False) - if klass is Series: - assert np.isnan(result) - else: - assert result == -1 - - obj = klass([pd.NaT, datetime(2011, 11, 1), pd.NaT]) - # check DatetimeIndex non-monotonic path - assert getattr(obj, opname)(), datetime(2011, 11, 1) - assert getattr(obj, opname)(skipna=False) is pd.NaT - - assert getattr(obj, arg_op)() == 1 - result = getattr(obj, arg_op)(skipna=False) - if klass is Series: - assert np.isnan(result) - else: - assert result == -1 - - for dtype in ["M8[ns]", "datetime64[ns, UTC]"]: - # cases with empty Series/DatetimeIndex - obj = klass([], dtype=dtype) - - assert getattr(obj, opname)() is pd.NaT - assert getattr(obj, opname)(skipna=False) is pd.NaT - - with pytest.raises(ValueError, match="empty sequence"): - getattr(obj, arg_op)() - with pytest.raises(ValueError, match="empty sequence"): - getattr(obj, arg_op)(skipna=False) - - # argmin/max + arg_op = "arg" + opname if klass is Index else "idx" + opname + + obj = klass([np.nan, 2.0]) + assert getattr(obj, opname)() == 2.0 + + obj = klass([np.nan]) + assert pd.isna(getattr(obj, opname)()) + assert pd.isna(getattr(obj, opname)(skipna=False)) + + obj = klass([], dtype=object) + assert pd.isna(getattr(obj, opname)()) + assert pd.isna(getattr(obj, opname)(skipna=False)) + + obj = klass([pd.NaT, datetime(2011, 11, 1)]) + # check DatetimeIndex monotonic path + assert getattr(obj, opname)() == datetime(2011, 11, 1) + assert getattr(obj, opname)(skipna=False) is pd.NaT + + assert getattr(obj, arg_op)() == 1 + result = getattr(obj, arg_op)(skipna=False) + if klass is Series: + assert np.isnan(result) + else: + assert result == -1 + + obj = klass([pd.NaT, datetime(2011, 11, 1), pd.NaT]) + # check DatetimeIndex non-monotonic path + assert getattr(obj, opname)(), datetime(2011, 11, 1) + assert getattr(obj, opname)(skipna=False) is pd.NaT + + assert getattr(obj, arg_op)() == 1 + result = getattr(obj, arg_op)(skipna=False) + if klass is Series: + assert np.isnan(result) + else: + assert result == -1 + + @pytest.mark.parametrize("opname", ["max", "min"]) + @pytest.mark.parametrize("klass", [Index, Series]) + @pytest.mark.parametrize("dtype", ["M8[ns]", "datetime64[ns, UTC]"]) + def test_nanops_empty_object(self, opname, klass, dtype): + arg_op = "arg" + opname if klass is Index else "idx" + opname + + obj = klass([], dtype=dtype) + + assert getattr(obj, opname)() is pd.NaT + assert getattr(obj, opname)(skipna=False) is pd.NaT + + with pytest.raises(ValueError, match="empty sequence"): + getattr(obj, arg_op)() + with pytest.raises(ValueError, match="empty sequence"): + getattr(obj, arg_op)(skipna=False) + + def test_argminmax(self): obj = Index(np.arange(5, dtype="int64")) assert obj.argmin() == 0 assert obj.argmax() == 4 From 23ad133e3d86ed5b4b1010e623d11cd13a66a62a Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 15 Feb 2020 19:11:34 -0600 Subject: [PATCH 05/13] Break into own test, parametrize --- pandas/tests/reductions/test_reductions.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index b2d41ed7d04d1..8ba3676da68bc 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -228,16 +228,17 @@ def test_minmax_timedelta64(self): assert idx.argmin() == 0 assert idx.argmax() == 2 - for op in ["min", "max"]: - # Return NaT - obj = TimedeltaIndex([]) - assert pd.isna(getattr(obj, op)()) + @pytest.mark.parametrize("op", ["min", "max"]) + def test_minmax_timedelta_empty_or_na(self, op): + # Return NaT + obj = TimedeltaIndex([]) + assert pd.isna(getattr(obj, op)()) - obj = TimedeltaIndex([pd.NaT]) - assert pd.isna(getattr(obj, op)()) + obj = TimedeltaIndex([pd.NaT]) + assert pd.isna(getattr(obj, op)()) - obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT]) - assert pd.isna(getattr(obj, op)()) + obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT]) + assert pd.isna(getattr(obj, op)()) def test_numpy_minmax_timedelta64(self): td = timedelta_range("16815 days", "16820 days", freq="D") From 7b2cfcbf4f67ccd67aa0a88127813500c2e1bb0b Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 15 Feb 2020 19:18:21 -0600 Subject: [PATCH 06/13] Revert "Break into own test, parametrize" This reverts commit 23ad133e3d86ed5b4b1010e623d11cd13a66a62a. --- pandas/tests/reductions/test_reductions.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 8ba3676da68bc..b2d41ed7d04d1 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -228,17 +228,16 @@ def test_minmax_timedelta64(self): assert idx.argmin() == 0 assert idx.argmax() == 2 - @pytest.mark.parametrize("op", ["min", "max"]) - def test_minmax_timedelta_empty_or_na(self, op): - # Return NaT - obj = TimedeltaIndex([]) - assert pd.isna(getattr(obj, op)()) + for op in ["min", "max"]: + # Return NaT + obj = TimedeltaIndex([]) + assert pd.isna(getattr(obj, op)()) - obj = TimedeltaIndex([pd.NaT]) - assert pd.isna(getattr(obj, op)()) + obj = TimedeltaIndex([pd.NaT]) + assert pd.isna(getattr(obj, op)()) - obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT]) - assert pd.isna(getattr(obj, op)()) + obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT]) + assert pd.isna(getattr(obj, op)()) def test_numpy_minmax_timedelta64(self): td = timedelta_range("16815 days", "16820 days", freq="D") From e8b37c1ef53abeffe9675e35b4f5040abb43cabf Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 15 Feb 2020 19:18:35 -0600 Subject: [PATCH 07/13] Revert "Remove some more loops" This reverts commit bac1eb5fcfcbd8e51c3a91222d3edca6c8e501a9. --- pandas/tests/reductions/test_reductions.py | 110 ++++++++++----------- 1 file changed, 53 insertions(+), 57 deletions(-) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index b2d41ed7d04d1..0b312fe2f8990 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -66,64 +66,60 @@ def test_ops(self, opname, obj): expected = expected.astype("M8[ns]").astype("int64") assert result.value == expected - @pytest.mark.parametrize("opname", ["max", "min"]) - @pytest.mark.parametrize("klass", [Index, Series]) - def test_nanops(self, opname, klass): + def test_nanops(self): # GH#7261 - arg_op = "arg" + opname if klass is Index else "idx" + opname - - obj = klass([np.nan, 2.0]) - assert getattr(obj, opname)() == 2.0 - - obj = klass([np.nan]) - assert pd.isna(getattr(obj, opname)()) - assert pd.isna(getattr(obj, opname)(skipna=False)) - - obj = klass([], dtype=object) - assert pd.isna(getattr(obj, opname)()) - assert pd.isna(getattr(obj, opname)(skipna=False)) - - obj = klass([pd.NaT, datetime(2011, 11, 1)]) - # check DatetimeIndex monotonic path - assert getattr(obj, opname)() == datetime(2011, 11, 1) - assert getattr(obj, opname)(skipna=False) is pd.NaT - - assert getattr(obj, arg_op)() == 1 - result = getattr(obj, arg_op)(skipna=False) - if klass is Series: - assert np.isnan(result) - else: - assert result == -1 - - obj = klass([pd.NaT, datetime(2011, 11, 1), pd.NaT]) - # check DatetimeIndex non-monotonic path - assert getattr(obj, opname)(), datetime(2011, 11, 1) - assert getattr(obj, opname)(skipna=False) is pd.NaT - - assert getattr(obj, arg_op)() == 1 - result = getattr(obj, arg_op)(skipna=False) - if klass is Series: - assert np.isnan(result) - else: - assert result == -1 - - @pytest.mark.parametrize("opname", ["max", "min"]) - @pytest.mark.parametrize("klass", [Index, Series]) - @pytest.mark.parametrize("dtype", ["M8[ns]", "datetime64[ns, UTC]"]) - def test_nanops_empty_object(self, opname, klass, dtype): - arg_op = "arg" + opname if klass is Index else "idx" + opname - - obj = klass([], dtype=dtype) - - assert getattr(obj, opname)() is pd.NaT - assert getattr(obj, opname)(skipna=False) is pd.NaT - - with pytest.raises(ValueError, match="empty sequence"): - getattr(obj, arg_op)() - with pytest.raises(ValueError, match="empty sequence"): - getattr(obj, arg_op)(skipna=False) - - def test_argminmax(self): + for opname in ["max", "min"]: + for klass in [Index, Series]: + arg_op = "arg" + opname if klass is Index else "idx" + opname + + obj = klass([np.nan, 2.0]) + assert getattr(obj, opname)() == 2.0 + + obj = klass([np.nan]) + assert pd.isna(getattr(obj, opname)()) + assert pd.isna(getattr(obj, opname)(skipna=False)) + + obj = klass([], dtype=object) + assert pd.isna(getattr(obj, opname)()) + assert pd.isna(getattr(obj, opname)(skipna=False)) + + obj = klass([pd.NaT, datetime(2011, 11, 1)]) + # check DatetimeIndex monotonic path + assert getattr(obj, opname)() == datetime(2011, 11, 1) + assert getattr(obj, opname)(skipna=False) is pd.NaT + + assert getattr(obj, arg_op)() == 1 + result = getattr(obj, arg_op)(skipna=False) + if klass is Series: + assert np.isnan(result) + else: + assert result == -1 + + obj = klass([pd.NaT, datetime(2011, 11, 1), pd.NaT]) + # check DatetimeIndex non-monotonic path + assert getattr(obj, opname)(), datetime(2011, 11, 1) + assert getattr(obj, opname)(skipna=False) is pd.NaT + + assert getattr(obj, arg_op)() == 1 + result = getattr(obj, arg_op)(skipna=False) + if klass is Series: + assert np.isnan(result) + else: + assert result == -1 + + for dtype in ["M8[ns]", "datetime64[ns, UTC]"]: + # cases with empty Series/DatetimeIndex + obj = klass([], dtype=dtype) + + assert getattr(obj, opname)() is pd.NaT + assert getattr(obj, opname)(skipna=False) is pd.NaT + + with pytest.raises(ValueError, match="empty sequence"): + getattr(obj, arg_op)() + with pytest.raises(ValueError, match="empty sequence"): + getattr(obj, arg_op)(skipna=False) + + # argmin/max obj = Index(np.arange(5, dtype="int64")) assert obj.argmin() == 0 assert obj.argmax() == 4 From fd84996ce104e54df379e85302f9f71c652562cd Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sun, 16 Feb 2020 11:44:49 -0600 Subject: [PATCH 08/13] Call functions directly --- pandas/tests/groupby/test_function.py | 35 +++++++++++---------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 48717cb2feeb6..748fefea7c846 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -174,11 +174,10 @@ def test_arg_passthru(): ) for attr in ["mean", "median"]: - f = getattr(df.groupby("group"), attr) - result = f() + result = getattr(df.groupby("group"), attr)() tm.assert_index_equal(result.columns, expected_columns_numeric) - result = f(numeric_only=False) + result = getattr(df.groupby("group"), attr)(numeric_only=False) tm.assert_frame_equal(result.reindex_like(expected), expected) # TODO: min, max *should* handle @@ -195,11 +194,10 @@ def test_arg_passthru(): ] ) for attr in ["min", "max"]: - f = getattr(df.groupby("group"), attr) - result = f() + result = getattr(df.groupby("group"), attr)() tm.assert_index_equal(result.columns, expected_columns) - result = f(numeric_only=False) + result = getattr(df.groupby("group"), attr)(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) expected_columns = Index( @@ -215,29 +213,26 @@ def test_arg_passthru(): ] ) for attr in ["first", "last"]: - f = getattr(df.groupby("group"), attr) - result = f() + result = getattr(df.groupby("group"), attr)() tm.assert_index_equal(result.columns, expected_columns) - result = f(numeric_only=False) + result = getattr(df.groupby("group"), attr)(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) expected_columns = Index(["int", "float", "string", "category_int", "timedelta"]) - f = getattr(df.groupby("group"), "sum") - result = f() + result = df.groupby("group").sum() tm.assert_index_equal(result.columns, expected_columns_numeric) - result = f(numeric_only=False) + result = df.groupby("group").sum(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) expected_columns = Index(["int", "float", "category_int"]) for attr in ["prod", "cumprod"]: - f = getattr(df.groupby("group"), attr) - result = f() + result = getattr(df.groupby("group"), attr)() tm.assert_index_equal(result.columns, expected_columns_numeric) - result = f(numeric_only=False) + result = getattr(df.groupby("group"), attr)(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) # like min, max, but don't include strings @@ -245,21 +240,19 @@ def test_arg_passthru(): ["int", "float", "category_int", "datetime", "datetimetz", "timedelta"] ) for attr in ["cummin", "cummax"]: - f = getattr(df.groupby("group"), attr) - result = f() + result = getattr(df.groupby("group"), attr)() # GH 15561: numeric_only=False set by default like min/max tm.assert_index_equal(result.columns, expected_columns) - result = f(numeric_only=False) + result = getattr(df.groupby("group"), attr)(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) expected_columns = Index(["int", "float", "category_int", "timedelta"]) - f = getattr(df.groupby("group"), "cumsum") - result = f() + result = getattr(df.groupby("group"), "cumsum")() tm.assert_index_equal(result.columns, expected_columns_numeric) - result = f(numeric_only=False) + result = getattr(df.groupby("group"), "cumsum")(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) From c3bc0936faf41e2e101f55049ebcb36225bac9bd Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sun, 16 Feb 2020 15:36:10 -0600 Subject: [PATCH 09/13] Make fixture and split out more tests --- pandas/tests/groupby/test_function.py | 92 +++++++++++++++++---------- 1 file changed, 57 insertions(+), 35 deletions(-) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 748fefea7c846..cfcf2c3aa6387 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -26,6 +26,28 @@ from pandas.util import _test_decorators as td +@pytest.fixture( + params=[np.int32, np.int64, np.float32, np.float64], + ids=["np.int32", "np.int64", "np.float32", "np.float64"], +) +def numpy_dtypes_min_max(request): + """ + Fixture of numpy dtypes with min and max values used for testing nanops + """ + dtype = request.param + dct = { + "dtype": dtype, + "min_val": np.iinfo(dtype).min + if np.dtype(dtype).kind == "i" + else np.finfo(dtype).min, + "max_val": np.iinfo(dtype).max + if np.dtype(dtype).kind == "i" + else np.finfo(dtype).max, + } + + return dct + + @pytest.mark.parametrize("agg_func", ["any", "all"]) @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize( @@ -679,19 +701,14 @@ def test_numpy_compat(func): getattr(g, func)(foo=1) -@pytest.mark.parametrize( - "dtype, min_val, max_val", - [ - (np.int32, np.iinfo(np.int32).min, np.iinfo(np.int32).max), - (np.int64, np.iinfo(np.int64).min, np.iinfo(np.int64).max), - (np.float32, np.finfo(np.float32).min, np.finfo(np.float32).max), - (np.float64, np.finfo(np.float64).min, np.finfo(np.float64).max), - ], -) @pytest.mark.xfail( _is_numpy_dev, reason="https://github.com/pandas-dev/pandas/issues/31992" ) -def test_cummin(dtype, min_val, max_val): +def test_cummin(numpy_dtypes_min_max): + dtype = numpy_dtypes_min_max["dtype"] + min_val = numpy_dtypes_min_max["min_val"] + max_val = numpy_dtypes_min_max["max_val"] + # GH 15048 base_df = pd.DataFrame( {"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [3, 4, 3, 2, 2, 3, 2, 1]} @@ -722,14 +739,6 @@ def test_cummin(dtype, min_val, max_val): expected = base_df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() tm.assert_frame_equal(result, expected) - # Test nan in entire column - base_df["B"] = np.nan - expected = pd.DataFrame({"B": [np.nan] * 8}) - result = base_df.groupby("A").cummin() - tm.assert_frame_equal(expected, result) - result = base_df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() - tm.assert_frame_equal(expected, result) - # GH 15561 df = pd.DataFrame(dict(a=[1], b=pd.to_datetime(["2001"]))) expected = pd.Series(pd.to_datetime("2001"), index=[0], name="b") @@ -744,19 +753,27 @@ def test_cummin(dtype, min_val, max_val): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize( - "dtype, min_val, max_val", - [ - (np.int32, np.iinfo(np.int32).min, np.iinfo(np.int32).max), - (np.int64, np.iinfo(np.int64).min, np.iinfo(np.int64).max), - (np.float32, np.finfo(np.float32).min, np.finfo(np.float32).max), - (np.float64, np.finfo(np.float64).min, np.finfo(np.float64).max), - ], +@pytest.mark.xfail( + _is_numpy_dev, reason="https://github.com/pandas-dev/pandas/issues/31992" ) +def test_cummin_all_nan_column(): + base_df = pd.DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [np.nan] * 8}) + + expected = pd.DataFrame({"B": [np.nan] * 8}) + result = base_df.groupby("A").cummin() + tm.assert_frame_equal(expected, result) + result = base_df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() + tm.assert_frame_equal(expected, result) + + @pytest.mark.xfail( _is_numpy_dev, reason="https://github.com/pandas-dev/pandas/issues/31992" ) -def test_cummax(dtype, min_val, max_val): +def test_cummax(numpy_dtypes_min_max): + dtype = numpy_dtypes_min_max["dtype"] + min_val = numpy_dtypes_min_max["min_val"] + max_val = numpy_dtypes_min_max["max_val"] + # GH 15048 base_df = pd.DataFrame( {"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [3, 4, 3, 2, 2, 3, 2, 1]} @@ -787,14 +804,6 @@ def test_cummax(dtype, min_val, max_val): expected = base_df.groupby("A").B.apply(lambda x: x.cummax()).to_frame() tm.assert_frame_equal(result, expected) - # Test nan in entire column - base_df["B"] = np.nan - expected = pd.DataFrame({"B": [np.nan] * 8}) - result = base_df.groupby("A").cummax() - tm.assert_frame_equal(expected, result) - result = base_df.groupby("A").B.apply(lambda x: x.cummax()).to_frame() - tm.assert_frame_equal(expected, result) - # GH 15561 df = pd.DataFrame(dict(a=[1], b=pd.to_datetime(["2001"]))) expected = pd.Series(pd.to_datetime("2001"), index=[0], name="b") @@ -809,6 +818,19 @@ def test_cummax(dtype, min_val, max_val): tm.assert_series_equal(result, expected) +@pytest.mark.xfail( + _is_numpy_dev, reason="https://github.com/pandas-dev/pandas/issues/31992" +) +def test_cummax_all_nan_column(): + base_df = pd.DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [np.nan] * 8}) + + expected = pd.DataFrame({"B": [np.nan] * 8}) + result = base_df.groupby("A").cummax() + tm.assert_frame_equal(expected, result) + result = base_df.groupby("A").B.apply(lambda x: x.cummax()).to_frame() + tm.assert_frame_equal(expected, result) + + @pytest.mark.parametrize( "in_vals, out_vals", [ From 6a01a6dcf93f26fbf537158529a8bc24a628af91 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sun, 16 Feb 2020 15:37:42 -0600 Subject: [PATCH 10/13] Fix --- pandas/tests/groupby/test_function.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index cfcf2c3aa6387..dfb1439ae4425 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -32,7 +32,8 @@ ) def numpy_dtypes_min_max(request): """ - Fixture of numpy dtypes with min and max values used for testing nanops + Fixture of numpy dtypes with min and max values used for testing + cummin and cummax """ dtype = request.param dct = { From 4e02e0761ae6017ecd41eb9368ce5412e49ba9c9 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sun, 16 Feb 2020 16:11:13 -0600 Subject: [PATCH 11/13] Update fixture --- pandas/tests/groupby/test_function.py | 33 ++++++++++----------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index dfb1439ae4425..1b0e161a3f81f 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -30,23 +30,12 @@ params=[np.int32, np.int64, np.float32, np.float64], ids=["np.int32", "np.int64", "np.float32", "np.float64"], ) -def numpy_dtypes_min_max(request): +def numpy_dtypes(request): """ Fixture of numpy dtypes with min and max values used for testing cummin and cummax """ - dtype = request.param - dct = { - "dtype": dtype, - "min_val": np.iinfo(dtype).min - if np.dtype(dtype).kind == "i" - else np.finfo(dtype).min, - "max_val": np.iinfo(dtype).max - if np.dtype(dtype).kind == "i" - else np.finfo(dtype).max, - } - - return dct + return request.param @pytest.mark.parametrize("agg_func", ["any", "all"]) @@ -705,10 +694,11 @@ def test_numpy_compat(func): @pytest.mark.xfail( _is_numpy_dev, reason="https://github.com/pandas-dev/pandas/issues/31992" ) -def test_cummin(numpy_dtypes_min_max): - dtype = numpy_dtypes_min_max["dtype"] - min_val = numpy_dtypes_min_max["min_val"] - max_val = numpy_dtypes_min_max["max_val"] +def test_cummin(numpy_dtypes): + dtype = numpy_dtypes + min_val = ( + np.iinfo(dtype).min if np.dtype(dtype).kind == "i" else np.finfo(dtype).min + ) # GH 15048 base_df = pd.DataFrame( @@ -770,10 +760,11 @@ def test_cummin_all_nan_column(): @pytest.mark.xfail( _is_numpy_dev, reason="https://github.com/pandas-dev/pandas/issues/31992" ) -def test_cummax(numpy_dtypes_min_max): - dtype = numpy_dtypes_min_max["dtype"] - min_val = numpy_dtypes_min_max["min_val"] - max_val = numpy_dtypes_min_max["max_val"] +def test_cummax(numpy_dtypes): + dtype = numpy_dtypes + max_val = ( + np.iinfo(dtype).max if np.dtype(dtype).kind == "i" else np.finfo(dtype).max + ) # GH 15048 base_df = pd.DataFrame( From a00df9ae824960ac83a394c95393b70d691878f4 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sun, 16 Feb 2020 18:05:24 -0600 Subject: [PATCH 12/13] xfail not strict --- pandas/tests/groupby/test_function.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 0aa9b9a77600e..b09fb012937b4 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -747,7 +747,9 @@ def test_cummin(numpy_dtypes): @pytest.mark.xfail( - _is_numpy_dev, reason="https://github.com/pandas-dev/pandas/issues/31992" + _is_numpy_dev, + reason="https://github.com/pandas-dev/pandas/issues/31992", + strict=False, ) def test_cummin_all_nan_column(): base_df = pd.DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [np.nan] * 8}) @@ -760,7 +762,9 @@ def test_cummin_all_nan_column(): @pytest.mark.xfail( - _is_numpy_dev, reason="https://github.com/pandas-dev/pandas/issues/31992" + _is_numpy_dev, + reason="https://github.com/pandas-dev/pandas/issues/31992", + strict=False, ) def test_cummax(numpy_dtypes): dtype = numpy_dtypes @@ -813,7 +817,9 @@ def test_cummax(numpy_dtypes): @pytest.mark.xfail( - _is_numpy_dev, reason="https://github.com/pandas-dev/pandas/issues/31992" + _is_numpy_dev, + reason="https://github.com/pandas-dev/pandas/issues/31992", + strict=False, ) def test_cummax_all_nan_column(): base_df = pd.DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [np.nan] * 8}) From 7cf1ae74f60bdf7bc60f400a34912c90902230f0 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 17 Feb 2020 12:05:47 -0600 Subject: [PATCH 13/13] Update fixture --- pandas/tests/groupby/test_function.py | 28 +++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index b09fb012937b4..6205dfb87bbd0 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -30,12 +30,20 @@ params=[np.int32, np.int64, np.float32, np.float64], ids=["np.int32", "np.int64", "np.float32", "np.float64"], ) -def numpy_dtypes(request): +def numpy_dtypes_for_minmax(request): """ Fixture of numpy dtypes with min and max values used for testing cummin and cummax """ - return request.param + dtype = request.param + min_val = ( + np.iinfo(dtype).min if np.dtype(dtype).kind == "i" else np.finfo(dtype).min + ) + max_val = ( + np.iinfo(dtype).max if np.dtype(dtype).kind == "i" else np.finfo(dtype).max + ) + + return (dtype, min_val, max_val) @pytest.mark.parametrize("agg_func", ["any", "all"]) @@ -696,11 +704,9 @@ def test_numpy_compat(func): reason="https://github.com/pandas-dev/pandas/issues/31992", strict=False, ) -def test_cummin(numpy_dtypes): - dtype = numpy_dtypes - min_val = ( - np.iinfo(dtype).min if np.dtype(dtype).kind == "i" else np.finfo(dtype).min - ) +def test_cummin(numpy_dtypes_for_minmax): + dtype = numpy_dtypes_for_minmax[0] + min_val = numpy_dtypes_for_minmax[1] # GH 15048 base_df = pd.DataFrame( @@ -766,11 +772,9 @@ def test_cummin_all_nan_column(): reason="https://github.com/pandas-dev/pandas/issues/31992", strict=False, ) -def test_cummax(numpy_dtypes): - dtype = numpy_dtypes - max_val = ( - np.iinfo(dtype).max if np.dtype(dtype).kind == "i" else np.finfo(dtype).max - ) +def test_cummax(numpy_dtypes_for_minmax): + dtype = numpy_dtypes_for_minmax[0] + max_val = numpy_dtypes_for_minmax[2] # GH 15048 base_df = pd.DataFrame(