From 7e461a18d9f6928132afec6f48ce968b3e989ba6 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Mon, 3 Dec 2018 17:43:52 +0100 Subject: [PATCH 1/5] remove \n from docstring --- pandas/core/arrays/datetimes.py | 26 +++++++++++++------------- pandas/core/arrays/timedeltas.py | 16 ++++++++-------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index cfe3afcf3730a..b3df505d56d78 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -82,7 +82,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -1072,19 +1072,19 @@ def date(self): return tslib.ints_to_pydatetime(timestamps, box="date") - year = _field_accessor('year', 'Y', "\n The year of the datetime\n") + year = _field_accessor('year', 'Y', "The year of the datetime") month = _field_accessor('month', 'M', - "\n The month as January=1, December=12 \n") - day = _field_accessor('day', 'D', "\nThe days of the datetime\n") - hour = _field_accessor('hour', 'h', "\nThe hours of the datetime\n") - minute = _field_accessor('minute', 'm', "\nThe minutes of the datetime\n") - second = _field_accessor('second', 's', "\nThe seconds of the datetime\n") + "The month as January=1, December=12") + day = _field_accessor('day', 'D', "The days of the datetime") + hour = _field_accessor('hour', 'h', "The hours of the datetime") + minute = _field_accessor('minute', 'm', "The minutes of the datetime") + second = _field_accessor('second', 's', "The seconds of the datetime") microsecond = _field_accessor('microsecond', 'us', - "\nThe microseconds of the datetime\n") + "The microseconds of the datetime") nanosecond = _field_accessor('nanosecond', 'ns', - "\nThe nanoseconds of the datetime\n") + "The nanoseconds of the datetime") weekofyear = _field_accessor('weekofyear', 'woy', - "\nThe week ordinal of the year\n") + "The week ordinal of the year") week = weekofyear _dayofweek_doc = """ The day of the week with Monday=0, Sunday=6. @@ -1129,12 +1129,12 @@ def date(self): "The name of day in a week (ex: Friday)\n\n.. deprecated:: 0.23.0") dayofyear = _field_accessor('dayofyear', 'doy', - "\nThe ordinal day of the year\n") - quarter = _field_accessor('quarter', 'q', "\nThe quarter of the date\n") + "The ordinal day of the year") + quarter = _field_accessor('quarter', 'q', "The quarter of the date") days_in_month = _field_accessor( 'days_in_month', 'dim', - "\nThe number of days in the month\n") + "The number of days in the month") daysinmonth = days_in_month _is_month_doc = """ Indicates whether the date is the {first_or_last} day of the month. diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 830283d31a929..4afc9f5483c2a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -59,7 +59,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -684,16 +684,16 @@ def to_pytimedelta(self): return tslibs.ints_to_pytimedelta(self.asi8) days = _field_accessor("days", "days", - "\nNumber of days for each element.\n") + "Number of days for each element.") seconds = _field_accessor("seconds", "seconds", - "\nNumber of seconds (>= 0 and less than 1 day) " - "for each element.\n") + "Number of seconds (>= 0 and less than 1 day) " + "for each element.") microseconds = _field_accessor("microseconds", "microseconds", - "\nNumber of microseconds (>= 0 and less " - "than 1 second) for each element.\n") + "Number of microseconds (>= 0 and less " + "than 1 second) for each element.") nanoseconds = _field_accessor("nanoseconds", "nanoseconds", - "\nNumber of nanoseconds (>= 0 and less " - "than 1 microsecond) for each element.\n") + "Number of nanoseconds (>= 0 and less " + "than 1 microsecond) for each element.") @property def components(self): From a73fda68cccc8e24989f7e7c1eaf13c3706972aa Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sat, 9 Nov 2019 09:37:43 +0100 Subject: [PATCH 2/5] Add tests for named tuples in MultiIndex columns cases --- .../tests/groupby/aggregate/test_aggregate.py | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index c03ffe317083c..99570f39604e0 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -494,6 +494,68 @@ def test_mangled(self): ) tm.assert_frame_equal(result, expected) + def test_agg_relabel_multiindex_column(self): + # GH 29422, add tests for multiindex column cases + df = pd.DataFrame( + {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} + ) + df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")]) + idx = pd.Index(["a", "b"], name=("x", "group")) + + result = df.groupby(("x", "group")).agg(a_max=(("y", "A"), "max")) + expected = pd.DataFrame({"a_max": [1, 3]}, index=idx) + tm.assert_frame_equal(result, expected) + + # multiple columns, and different agg methods + result = df.groupby(("x", "group")).agg( + a_max=(("y", "A"), "max"), + a_min=(("y", "A"), np.min), + b_mean=(("y", "B"), "mean"), + ) + expected = pd.DataFrame( + {"a_max": [1, 3], "a_min": [0, 2], "b_mean": [6, 7]}, index=idx + ) + tm.assert_frame_equal(result, expected) + + # multiple colums, with lamdba being used + result = df.groupby(("x", "group")).agg( + a_max=(("y", "A"), lambda x: max(x)), + a_const=(("y", "A"), lambda x: 1), + b_mean=(("y", "B"), "mean"), + ) + expected = pd.DataFrame( + {"a_max": [1, 3], "a_const": [1, 1], "b_mean": [6, 7]}, index=idx + ) + tm.assert_frame_equal(result, expected) + + def test_agg_relabel_multiindex_raises(self): + # GH 29422, add tests for raises senarios in multiindex column cases + df = pd.DataFrame( + {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} + ) + df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")]) + + with pytest.raises(KeyError, match="does not exist"): + df.groupby(("x", "group")).agg(a=(("Y", "a"), "max")) + + with pytest.raises(SpecificationError, match="Function names"): + df.groupby(("x", "group")).agg(a=(("y", "A"), "min"), b=(("y", "A"), "min")) + + def test_namedagg_multiindex_column(self): + # GH 29422, add tests for namedagg in multiindex column cases + df = pd.DataFrame( + {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} + ) + df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")]) + idx = pd.Index(["a", "b"], name=("x", "group")) + + result = df.groupby(("x", "group")).agg( + a_max=pd.NamedAgg(("y", "A"), "max"), + b_mean=pd.NamedAgg(("y", "B"), np.mean), + ) + expected = pd.DataFrame({"a_max": [1, 3], "b_mean": [6, 7]}, index=idx) + tm.assert_frame_equal(result, expected) + def myfunc(s): return np.percentile(s, q=0.90) From 8b2c654909bd4c520d408eb051564afcfec0f0c4 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sat, 9 Nov 2019 09:43:31 +0100 Subject: [PATCH 3/5] fix typo --- pandas/tests/groupby/aggregate/test_aggregate.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 99570f39604e0..5701ea7e60398 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -513,7 +513,7 @@ def test_agg_relabel_multiindex_column(self): b_mean=(("y", "B"), "mean"), ) expected = pd.DataFrame( - {"a_max": [1, 3], "a_min": [0, 2], "b_mean": [6, 7]}, index=idx + {"a_max": [1, 3], "a_min": [0, 2], "b_mean": [5.5, 7.5]}, index=idx ) tm.assert_frame_equal(result, expected) @@ -524,7 +524,7 @@ def test_agg_relabel_multiindex_column(self): b_mean=(("y", "B"), "mean"), ) expected = pd.DataFrame( - {"a_max": [1, 3], "a_const": [1, 1], "b_mean": [6, 7]}, index=idx + {"a_max": [1, 3], "a_const": [1, 1], "b_mean": [5.5, 7.5]}, index=idx ) tm.assert_frame_equal(result, expected) @@ -553,7 +553,7 @@ def test_namedagg_multiindex_column(self): a_max=pd.NamedAgg(("y", "A"), "max"), b_mean=pd.NamedAgg(("y", "B"), np.mean), ) - expected = pd.DataFrame({"a_max": [1, 3], "b_mean": [6, 7]}, index=idx) + expected = pd.DataFrame({"a_max": [1, 3], "b_mean": [5.5, 7.5]}, index=idx) tm.assert_frame_equal(result, expected) From 8ed17ac1e6aa544a2dc7fe1f612a74708826026e Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sat, 9 Nov 2019 13:53:03 +0100 Subject: [PATCH 4/5] code change based on review --- .../tests/groupby/aggregate/test_aggregate.py | 108 +++++++++--------- 1 file changed, 55 insertions(+), 53 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 5701ea7e60398..2f51a31579562 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -494,67 +494,69 @@ def test_mangled(self): ) tm.assert_frame_equal(result, expected) - def test_agg_relabel_multiindex_column(self): - # GH 29422, add tests for multiindex column cases - df = pd.DataFrame( - {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} - ) - df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")]) - idx = pd.Index(["a", "b"], name=("x", "group")) - result = df.groupby(("x", "group")).agg(a_max=(("y", "A"), "max")) - expected = pd.DataFrame({"a_max": [1, 3]}, index=idx) - tm.assert_frame_equal(result, expected) +def test_agg_relabel_multiindex_column(): + # GH 29422, add tests for multiindex column cases + df = DataFrame( + {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} + ) + df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")]) + idx = pd.Index(["a", "b"], name=("x", "group")) - # multiple columns, and different agg methods - result = df.groupby(("x", "group")).agg( - a_max=(("y", "A"), "max"), - a_min=(("y", "A"), np.min), - b_mean=(("y", "B"), "mean"), - ) - expected = pd.DataFrame( - {"a_max": [1, 3], "a_min": [0, 2], "b_mean": [5.5, 7.5]}, index=idx - ) - tm.assert_frame_equal(result, expected) + result = df.groupby(("x", "group")).agg(a_max=(("y", "A"), "max")) + expected = DataFrame({"a_max": [1, 3]}, index=idx) + tm.assert_frame_equal(result, expected) - # multiple colums, with lamdba being used - result = df.groupby(("x", "group")).agg( - a_max=(("y", "A"), lambda x: max(x)), - a_const=(("y", "A"), lambda x: 1), - b_mean=(("y", "B"), "mean"), - ) - expected = pd.DataFrame( - {"a_max": [1, 3], "a_const": [1, 1], "b_mean": [5.5, 7.5]}, index=idx - ) - tm.assert_frame_equal(result, expected) + # multiple columns, and different agg methods + result = df.groupby(("x", "group")).agg( + a_max=(("y", "A"), "max"), + a_min=(("y", "A"), np.min), + b_mean=(("y", "B"), "mean"), + ) + expected = DataFrame( + {"a_max": [1, 3], "a_min": [0, 2], "b_mean": [5.5, 7.5]}, index=idx + ) + tm.assert_frame_equal(result, expected) - def test_agg_relabel_multiindex_raises(self): - # GH 29422, add tests for raises senarios in multiindex column cases - df = pd.DataFrame( - {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} - ) - df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")]) + # multiple colums, with lamdba being used + result = df.groupby(("x", "group")).agg( + a_max=(("y", "A"), lambda x: max(x)), + a_const=(("y", "A"), lambda x: 1), + b_mean=(("y", "B"), "mean"), + ) + expected = DataFrame( + {"a_max": [1, 3], "a_const": [1, 1], "b_mean": [5.5, 7.5]}, index=idx + ) + tm.assert_frame_equal(result, expected) - with pytest.raises(KeyError, match="does not exist"): - df.groupby(("x", "group")).agg(a=(("Y", "a"), "max")) - with pytest.raises(SpecificationError, match="Function names"): - df.groupby(("x", "group")).agg(a=(("y", "A"), "min"), b=(("y", "A"), "min")) +def test_agg_relabel_multiindex_raises(): + # GH 29422, add tests for raises senarios in multiindex column cases + df = DataFrame( + {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} + ) + df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")]) - def test_namedagg_multiindex_column(self): - # GH 29422, add tests for namedagg in multiindex column cases - df = pd.DataFrame( - {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} - ) - df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")]) - idx = pd.Index(["a", "b"], name=("x", "group")) + with pytest.raises(KeyError, match="does not exist"): + df.groupby(("x", "group")).agg(a=(("Y", "a"), "max")) - result = df.groupby(("x", "group")).agg( - a_max=pd.NamedAgg(("y", "A"), "max"), - b_mean=pd.NamedAgg(("y", "B"), np.mean), - ) - expected = pd.DataFrame({"a_max": [1, 3], "b_mean": [5.5, 7.5]}, index=idx) - tm.assert_frame_equal(result, expected) + with pytest.raises(SpecificationError, match="Function names"): + df.groupby(("x", "group")).agg(a=(("y", "A"), "min"), b=(("y", "A"), "min")) + + +def test_named_agg_multiindex_column(): + # GH 29422, add tests for namedagg in multiindex column cases + df = DataFrame( + {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} + ) + df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")]) + idx = pd.Index(["a", "b"], name=("x", "group")) + + result = df.groupby(("x", "group")).agg( + a_max=pd.NamedAgg(("y", "A"), "max"), b_mean=pd.NamedAgg(("y", "B"), np.mean) + ) + expected = DataFrame({"a_max": [1, 3], "b_mean": [5.5, 7.5]}, index=idx) + tm.assert_frame_equal(result, expected) def myfunc(s): From 7e138de68c2b608b17c225c4c4eabd4001856093 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 11 Nov 2019 20:49:46 +0100 Subject: [PATCH 5/5] code change based on review --- .../tests/groupby/aggregate/test_aggregate.py | 70 +++++++++++-------- 1 file changed, 40 insertions(+), 30 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 2f51a31579562..4313b52798c6e 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -495,7 +495,38 @@ def test_mangled(self): tm.assert_frame_equal(result, expected) -def test_agg_relabel_multiindex_column(): +@pytest.mark.parametrize( + "agg_col1, agg_col2, agg_col3, agg_result1, agg_result2, agg_result3", + [ + ( + (("y", "A"), "max"), + (("y", "A"), np.min), + (("y", "B"), "mean"), + [1, 3], + [0, 2], + [5.5, 7.5], + ), + ( + (("y", "A"), lambda x: max(x)), + (("y", "A"), lambda x: 1), + (("y", "B"), "mean"), + [1, 3], + [1, 1], + [5.5, 7.5], + ), + ( + pd.NamedAgg(("y", "A"), "max"), + pd.NamedAgg(("y", "B"), np.mean), + pd.NamedAgg(("y", "A"), lambda x: 1), + [1, 3], + [5.5, 7.5], + [1, 1], + ), + ], +) +def test_agg_relabel_multiindex_column( + agg_col1, agg_col2, agg_col3, agg_result1, agg_result2, agg_result3 +): # GH 29422, add tests for multiindex column cases df = DataFrame( {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} @@ -507,31 +538,17 @@ def test_agg_relabel_multiindex_column(): expected = DataFrame({"a_max": [1, 3]}, index=idx) tm.assert_frame_equal(result, expected) - # multiple columns, and different agg methods - result = df.groupby(("x", "group")).agg( - a_max=(("y", "A"), "max"), - a_min=(("y", "A"), np.min), - b_mean=(("y", "B"), "mean"), - ) - expected = DataFrame( - {"a_max": [1, 3], "a_min": [0, 2], "b_mean": [5.5, 7.5]}, index=idx - ) - tm.assert_frame_equal(result, expected) - - # multiple colums, with lamdba being used result = df.groupby(("x", "group")).agg( - a_max=(("y", "A"), lambda x: max(x)), - a_const=(("y", "A"), lambda x: 1), - b_mean=(("y", "B"), "mean"), + col_1=agg_col1, col_2=agg_col2, col_3=agg_col3 ) expected = DataFrame( - {"a_max": [1, 3], "a_const": [1, 1], "b_mean": [5.5, 7.5]}, index=idx + {"col_1": agg_result1, "col_2": agg_result2, "col_3": agg_result3}, index=idx ) tm.assert_frame_equal(result, expected) -def test_agg_relabel_multiindex_raises(): - # GH 29422, add tests for raises senarios in multiindex column cases +def test_agg_relabel_multiindex_raises_not_exist(): + # GH 29422, add test for raises senario when aggregate column does not exist df = DataFrame( {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} ) @@ -540,23 +557,16 @@ def test_agg_relabel_multiindex_raises(): with pytest.raises(KeyError, match="does not exist"): df.groupby(("x", "group")).agg(a=(("Y", "a"), "max")) - with pytest.raises(SpecificationError, match="Function names"): - df.groupby(("x", "group")).agg(a=(("y", "A"), "min"), b=(("y", "A"), "min")) - -def test_named_agg_multiindex_column(): - # GH 29422, add tests for namedagg in multiindex column cases +def test_agg_relabel_multiindex_raises_duplicate(): + # GH29422, add test for raises senario when getting duplicates df = DataFrame( {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} ) df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")]) - idx = pd.Index(["a", "b"], name=("x", "group")) - result = df.groupby(("x", "group")).agg( - a_max=pd.NamedAgg(("y", "A"), "max"), b_mean=pd.NamedAgg(("y", "B"), np.mean) - ) - expected = DataFrame({"a_max": [1, 3], "b_mean": [5.5, 7.5]}, index=idx) - tm.assert_frame_equal(result, expected) + with pytest.raises(SpecificationError, match="Function names"): + df.groupby(("x", "group")).agg(a=(("y", "A"), "min"), b=(("y", "A"), "min")) def myfunc(s):