From 7e461a18d9f6928132afec6f48ce968b3e989ba6 Mon Sep 17 00:00:00 2001
From: Kaiqi Dong <kaiqi@kth.se>
Date: Mon, 3 Dec 2018 17:43:52 +0100
Subject: [PATCH 1/5] remove \n from docstring

---
 pandas/core/arrays/datetimes.py  | 26 +++++++++++++-------------
 pandas/core/arrays/timedeltas.py | 16 ++++++++--------
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index cfe3afcf3730a..b3df505d56d78 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -82,7 +82,7 @@ def f(self):
         return result
 
     f.__name__ = name
-    f.__doc__ = docstring
+    f.__doc__ = "\n{}\n".format(docstring)
     return property(f)
 
 
@@ -1072,19 +1072,19 @@ def date(self):
 
         return tslib.ints_to_pydatetime(timestamps, box="date")
 
-    year = _field_accessor('year', 'Y', "\n The year of the datetime\n")
+    year = _field_accessor('year', 'Y', "The year of the datetime")
     month = _field_accessor('month', 'M',
-                            "\n The month as January=1, December=12 \n")
-    day = _field_accessor('day', 'D', "\nThe days of the datetime\n")
-    hour = _field_accessor('hour', 'h', "\nThe hours of the datetime\n")
-    minute = _field_accessor('minute', 'm', "\nThe minutes of the datetime\n")
-    second = _field_accessor('second', 's', "\nThe seconds of the datetime\n")
+                            "The month as January=1, December=12")
+    day = _field_accessor('day', 'D', "The days of the datetime")
+    hour = _field_accessor('hour', 'h', "The hours of the datetime")
+    minute = _field_accessor('minute', 'm', "The minutes of the datetime")
+    second = _field_accessor('second', 's', "The seconds of the datetime")
     microsecond = _field_accessor('microsecond', 'us',
-                                  "\nThe microseconds of the datetime\n")
+                                  "The microseconds of the datetime")
     nanosecond = _field_accessor('nanosecond', 'ns',
-                                 "\nThe nanoseconds of the datetime\n")
+                                 "The nanoseconds of the datetime")
     weekofyear = _field_accessor('weekofyear', 'woy',
-                                 "\nThe week ordinal of the year\n")
+                                 "The week ordinal of the year")
     week = weekofyear
     _dayofweek_doc = """
     The day of the week with Monday=0, Sunday=6.
@@ -1129,12 +1129,12 @@ def date(self):
         "The name of day in a week (ex: Friday)\n\n.. deprecated:: 0.23.0")
 
     dayofyear = _field_accessor('dayofyear', 'doy',
-                                "\nThe ordinal day of the year\n")
-    quarter = _field_accessor('quarter', 'q', "\nThe quarter of the date\n")
+                                "The ordinal day of the year")
+    quarter = _field_accessor('quarter', 'q', "The quarter of the date")
     days_in_month = _field_accessor(
         'days_in_month',
         'dim',
-        "\nThe number of days in the month\n")
+        "The number of days in the month")
     daysinmonth = days_in_month
     _is_month_doc = """
         Indicates whether the date is the {first_or_last} day of the month.
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
index 830283d31a929..4afc9f5483c2a 100644
--- a/pandas/core/arrays/timedeltas.py
+++ b/pandas/core/arrays/timedeltas.py
@@ -59,7 +59,7 @@ def f(self):
         return result
 
     f.__name__ = name
-    f.__doc__ = docstring
+    f.__doc__ = "\n{}\n".format(docstring)
     return property(f)
 
 
@@ -684,16 +684,16 @@ def to_pytimedelta(self):
         return tslibs.ints_to_pytimedelta(self.asi8)
 
     days = _field_accessor("days", "days",
-                           "\nNumber of days for each element.\n")
+                           "Number of days for each element.")
     seconds = _field_accessor("seconds", "seconds",
-                              "\nNumber of seconds (>= 0 and less than 1 day) "
-                              "for each element.\n")
+                              "Number of seconds (>= 0 and less than 1 day) "
+                              "for each element.")
     microseconds = _field_accessor("microseconds", "microseconds",
-                                   "\nNumber of microseconds (>= 0 and less "
-                                   "than 1 second) for each element.\n")
+                                   "Number of microseconds (>= 0 and less "
+                                   "than 1 second) for each element.")
     nanoseconds = _field_accessor("nanoseconds", "nanoseconds",
-                                  "\nNumber of nanoseconds (>= 0 and less "
-                                  "than 1 microsecond) for each element.\n")
+                                  "Number of nanoseconds (>= 0 and less "
+                                  "than 1 microsecond) for each element.")
 
     @property
     def components(self):

From a73fda68cccc8e24989f7e7c1eaf13c3706972aa Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 9 Nov 2019 09:37:43 +0100
Subject: [PATCH 2/5] Add tests for named tuples in MultiIndex columns cases

---
 .../tests/groupby/aggregate/test_aggregate.py | 62 +++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index c03ffe317083c..99570f39604e0 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -494,6 +494,68 @@ def test_mangled(self):
         )
         tm.assert_frame_equal(result, expected)
 
+    def test_agg_relabel_multiindex_column(self):
+        # GH 29422, add tests for multiindex column cases
+        df = pd.DataFrame(
+            {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]}
+        )
+        df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")])
+        idx = pd.Index(["a", "b"], name=("x", "group"))
+
+        result = df.groupby(("x", "group")).agg(a_max=(("y", "A"), "max"))
+        expected = pd.DataFrame({"a_max": [1, 3]}, index=idx)
+        tm.assert_frame_equal(result, expected)
+
+        # multiple columns, and different agg methods
+        result = df.groupby(("x", "group")).agg(
+            a_max=(("y", "A"), "max"),
+            a_min=(("y", "A"), np.min),
+            b_mean=(("y", "B"), "mean"),
+        )
+        expected = pd.DataFrame(
+            {"a_max": [1, 3], "a_min": [0, 2], "b_mean": [6, 7]}, index=idx
+        )
+        tm.assert_frame_equal(result, expected)
+
+        # multiple colums, with lamdba being used
+        result = df.groupby(("x", "group")).agg(
+            a_max=(("y", "A"), lambda x: max(x)),
+            a_const=(("y", "A"), lambda x: 1),
+            b_mean=(("y", "B"), "mean"),
+        )
+        expected = pd.DataFrame(
+            {"a_max": [1, 3], "a_const": [1, 1], "b_mean": [6, 7]}, index=idx
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_agg_relabel_multiindex_raises(self):
+        # GH 29422, add tests for raises senarios in multiindex column cases
+        df = pd.DataFrame(
+            {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]}
+        )
+        df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")])
+
+        with pytest.raises(KeyError, match="does not exist"):
+            df.groupby(("x", "group")).agg(a=(("Y", "a"), "max"))
+
+        with pytest.raises(SpecificationError, match="Function names"):
+            df.groupby(("x", "group")).agg(a=(("y", "A"), "min"), b=(("y", "A"), "min"))
+
+    def test_namedagg_multiindex_column(self):
+        # GH 29422, add tests for namedagg in multiindex column cases
+        df = pd.DataFrame(
+            {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]}
+        )
+        df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")])
+        idx = pd.Index(["a", "b"], name=("x", "group"))
+
+        result = df.groupby(("x", "group")).agg(
+            a_max=pd.NamedAgg(("y", "A"), "max"),
+            b_mean=pd.NamedAgg(("y", "B"), np.mean),
+        )
+        expected = pd.DataFrame({"a_max": [1, 3], "b_mean": [6, 7]}, index=idx)
+        tm.assert_frame_equal(result, expected)
+
 
 def myfunc(s):
     return np.percentile(s, q=0.90)

From 8b2c654909bd4c520d408eb051564afcfec0f0c4 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 9 Nov 2019 09:43:31 +0100
Subject: [PATCH 3/5] fix typo

---
 pandas/tests/groupby/aggregate/test_aggregate.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 99570f39604e0..5701ea7e60398 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -513,7 +513,7 @@ def test_agg_relabel_multiindex_column(self):
             b_mean=(("y", "B"), "mean"),
         )
         expected = pd.DataFrame(
-            {"a_max": [1, 3], "a_min": [0, 2], "b_mean": [6, 7]}, index=idx
+            {"a_max": [1, 3], "a_min": [0, 2], "b_mean": [5.5, 7.5]}, index=idx
         )
         tm.assert_frame_equal(result, expected)
 
@@ -524,7 +524,7 @@ def test_agg_relabel_multiindex_column(self):
             b_mean=(("y", "B"), "mean"),
         )
         expected = pd.DataFrame(
-            {"a_max": [1, 3], "a_const": [1, 1], "b_mean": [6, 7]}, index=idx
+            {"a_max": [1, 3], "a_const": [1, 1], "b_mean": [5.5, 7.5]}, index=idx
         )
         tm.assert_frame_equal(result, expected)
 
@@ -553,7 +553,7 @@ def test_namedagg_multiindex_column(self):
             a_max=pd.NamedAgg(("y", "A"), "max"),
             b_mean=pd.NamedAgg(("y", "B"), np.mean),
         )
-        expected = pd.DataFrame({"a_max": [1, 3], "b_mean": [6, 7]}, index=idx)
+        expected = pd.DataFrame({"a_max": [1, 3], "b_mean": [5.5, 7.5]}, index=idx)
         tm.assert_frame_equal(result, expected)
 
 

From 8ed17ac1e6aa544a2dc7fe1f612a74708826026e Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Sat, 9 Nov 2019 13:53:03 +0100
Subject: [PATCH 4/5] code change based on review

---
 .../tests/groupby/aggregate/test_aggregate.py | 108 +++++++++---------
 1 file changed, 55 insertions(+), 53 deletions(-)

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 5701ea7e60398..2f51a31579562 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -494,67 +494,69 @@ def test_mangled(self):
         )
         tm.assert_frame_equal(result, expected)
 
-    def test_agg_relabel_multiindex_column(self):
-        # GH 29422, add tests for multiindex column cases
-        df = pd.DataFrame(
-            {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]}
-        )
-        df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")])
-        idx = pd.Index(["a", "b"], name=("x", "group"))
 
-        result = df.groupby(("x", "group")).agg(a_max=(("y", "A"), "max"))
-        expected = pd.DataFrame({"a_max": [1, 3]}, index=idx)
-        tm.assert_frame_equal(result, expected)
+def test_agg_relabel_multiindex_column():
+    # GH 29422, add tests for multiindex column cases
+    df = DataFrame(
+        {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]}
+    )
+    df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")])
+    idx = pd.Index(["a", "b"], name=("x", "group"))
 
-        # multiple columns, and different agg methods
-        result = df.groupby(("x", "group")).agg(
-            a_max=(("y", "A"), "max"),
-            a_min=(("y", "A"), np.min),
-            b_mean=(("y", "B"), "mean"),
-        )
-        expected = pd.DataFrame(
-            {"a_max": [1, 3], "a_min": [0, 2], "b_mean": [5.5, 7.5]}, index=idx
-        )
-        tm.assert_frame_equal(result, expected)
+    result = df.groupby(("x", "group")).agg(a_max=(("y", "A"), "max"))
+    expected = DataFrame({"a_max": [1, 3]}, index=idx)
+    tm.assert_frame_equal(result, expected)
 
-        # multiple colums, with lamdba being used
-        result = df.groupby(("x", "group")).agg(
-            a_max=(("y", "A"), lambda x: max(x)),
-            a_const=(("y", "A"), lambda x: 1),
-            b_mean=(("y", "B"), "mean"),
-        )
-        expected = pd.DataFrame(
-            {"a_max": [1, 3], "a_const": [1, 1], "b_mean": [5.5, 7.5]}, index=idx
-        )
-        tm.assert_frame_equal(result, expected)
+    # multiple columns, and different agg methods
+    result = df.groupby(("x", "group")).agg(
+        a_max=(("y", "A"), "max"),
+        a_min=(("y", "A"), np.min),
+        b_mean=(("y", "B"), "mean"),
+    )
+    expected = DataFrame(
+        {"a_max": [1, 3], "a_min": [0, 2], "b_mean": [5.5, 7.5]}, index=idx
+    )
+    tm.assert_frame_equal(result, expected)
 
-    def test_agg_relabel_multiindex_raises(self):
-        # GH 29422, add tests for raises senarios in multiindex column cases
-        df = pd.DataFrame(
-            {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]}
-        )
-        df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")])
+    # multiple colums, with lamdba being used
+    result = df.groupby(("x", "group")).agg(
+        a_max=(("y", "A"), lambda x: max(x)),
+        a_const=(("y", "A"), lambda x: 1),
+        b_mean=(("y", "B"), "mean"),
+    )
+    expected = DataFrame(
+        {"a_max": [1, 3], "a_const": [1, 1], "b_mean": [5.5, 7.5]}, index=idx
+    )
+    tm.assert_frame_equal(result, expected)
 
-        with pytest.raises(KeyError, match="does not exist"):
-            df.groupby(("x", "group")).agg(a=(("Y", "a"), "max"))
 
-        with pytest.raises(SpecificationError, match="Function names"):
-            df.groupby(("x", "group")).agg(a=(("y", "A"), "min"), b=(("y", "A"), "min"))
+def test_agg_relabel_multiindex_raises():
+    # GH 29422, add tests for raises senarios in multiindex column cases
+    df = DataFrame(
+        {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]}
+    )
+    df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")])
 
-    def test_namedagg_multiindex_column(self):
-        # GH 29422, add tests for namedagg in multiindex column cases
-        df = pd.DataFrame(
-            {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]}
-        )
-        df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")])
-        idx = pd.Index(["a", "b"], name=("x", "group"))
+    with pytest.raises(KeyError, match="does not exist"):
+        df.groupby(("x", "group")).agg(a=(("Y", "a"), "max"))
 
-        result = df.groupby(("x", "group")).agg(
-            a_max=pd.NamedAgg(("y", "A"), "max"),
-            b_mean=pd.NamedAgg(("y", "B"), np.mean),
-        )
-        expected = pd.DataFrame({"a_max": [1, 3], "b_mean": [5.5, 7.5]}, index=idx)
-        tm.assert_frame_equal(result, expected)
+    with pytest.raises(SpecificationError, match="Function names"):
+        df.groupby(("x", "group")).agg(a=(("y", "A"), "min"), b=(("y", "A"), "min"))
+
+
+def test_named_agg_multiindex_column():
+    # GH 29422, add tests for namedagg in multiindex column cases
+    df = DataFrame(
+        {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]}
+    )
+    df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")])
+    idx = pd.Index(["a", "b"], name=("x", "group"))
+
+    result = df.groupby(("x", "group")).agg(
+        a_max=pd.NamedAgg(("y", "A"), "max"), b_mean=pd.NamedAgg(("y", "B"), np.mean)
+    )
+    expected = DataFrame({"a_max": [1, 3], "b_mean": [5.5, 7.5]}, index=idx)
+    tm.assert_frame_equal(result, expected)
 
 
 def myfunc(s):

From 7e138de68c2b608b17c225c4c4eabd4001856093 Mon Sep 17 00:00:00 2001
From: Kaiqi <kaiqi@kth.se>
Date: Mon, 11 Nov 2019 20:49:46 +0100
Subject: [PATCH 5/5] code change based on review

---
 .../tests/groupby/aggregate/test_aggregate.py | 70 +++++++++++--------
 1 file changed, 40 insertions(+), 30 deletions(-)

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 2f51a31579562..4313b52798c6e 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -495,7 +495,38 @@ def test_mangled(self):
         tm.assert_frame_equal(result, expected)
 
 
-def test_agg_relabel_multiindex_column():
+@pytest.mark.parametrize(
+    "agg_col1, agg_col2, agg_col3, agg_result1, agg_result2, agg_result3",
+    [
+        (
+            (("y", "A"), "max"),
+            (("y", "A"), np.min),
+            (("y", "B"), "mean"),
+            [1, 3],
+            [0, 2],
+            [5.5, 7.5],
+        ),
+        (
+            (("y", "A"), lambda x: max(x)),
+            (("y", "A"), lambda x: 1),
+            (("y", "B"), "mean"),
+            [1, 3],
+            [1, 1],
+            [5.5, 7.5],
+        ),
+        (
+            pd.NamedAgg(("y", "A"), "max"),
+            pd.NamedAgg(("y", "B"), np.mean),
+            pd.NamedAgg(("y", "A"), lambda x: 1),
+            [1, 3],
+            [5.5, 7.5],
+            [1, 1],
+        ),
+    ],
+)
+def test_agg_relabel_multiindex_column(
+    agg_col1, agg_col2, agg_col3, agg_result1, agg_result2, agg_result3
+):
     # GH 29422, add tests for multiindex column cases
     df = DataFrame(
         {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]}
@@ -507,31 +538,17 @@ def test_agg_relabel_multiindex_column():
     expected = DataFrame({"a_max": [1, 3]}, index=idx)
     tm.assert_frame_equal(result, expected)
 
-    # multiple columns, and different agg methods
-    result = df.groupby(("x", "group")).agg(
-        a_max=(("y", "A"), "max"),
-        a_min=(("y", "A"), np.min),
-        b_mean=(("y", "B"), "mean"),
-    )
-    expected = DataFrame(
-        {"a_max": [1, 3], "a_min": [0, 2], "b_mean": [5.5, 7.5]}, index=idx
-    )
-    tm.assert_frame_equal(result, expected)
-
-    # multiple colums, with lamdba being used
     result = df.groupby(("x", "group")).agg(
-        a_max=(("y", "A"), lambda x: max(x)),
-        a_const=(("y", "A"), lambda x: 1),
-        b_mean=(("y", "B"), "mean"),
+        col_1=agg_col1, col_2=agg_col2, col_3=agg_col3
     )
     expected = DataFrame(
-        {"a_max": [1, 3], "a_const": [1, 1], "b_mean": [5.5, 7.5]}, index=idx
+        {"col_1": agg_result1, "col_2": agg_result2, "col_3": agg_result3}, index=idx
     )
     tm.assert_frame_equal(result, expected)
 
 
-def test_agg_relabel_multiindex_raises():
-    # GH 29422, add tests for raises senarios in multiindex column cases
+def test_agg_relabel_multiindex_raises_not_exist():
+    # GH 29422, add test for raises senario when aggregate column does not exist
     df = DataFrame(
         {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]}
     )
@@ -540,23 +557,16 @@ def test_agg_relabel_multiindex_raises():
     with pytest.raises(KeyError, match="does not exist"):
         df.groupby(("x", "group")).agg(a=(("Y", "a"), "max"))
 
-    with pytest.raises(SpecificationError, match="Function names"):
-        df.groupby(("x", "group")).agg(a=(("y", "A"), "min"), b=(("y", "A"), "min"))
 
-
-def test_named_agg_multiindex_column():
-    # GH 29422, add tests for namedagg in multiindex column cases
+def test_agg_relabel_multiindex_raises_duplicate():
+    # GH29422, add test for raises senario when getting duplicates
     df = DataFrame(
         {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]}
     )
     df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")])
-    idx = pd.Index(["a", "b"], name=("x", "group"))
 
-    result = df.groupby(("x", "group")).agg(
-        a_max=pd.NamedAgg(("y", "A"), "max"), b_mean=pd.NamedAgg(("y", "B"), np.mean)
-    )
-    expected = DataFrame({"a_max": [1, 3], "b_mean": [5.5, 7.5]}, index=idx)
-    tm.assert_frame_equal(result, expected)
+    with pytest.raises(SpecificationError, match="Function names"):
+        df.groupby(("x", "group")).agg(a=(("y", "A"), "min"), b=(("y", "A"), "min"))
 
 
 def myfunc(s):