pandas-dev · jreback · Oct 26, 2020 · Oct 25, 2020 · Oct 25, 2020 · Oct 25, 2020
diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py
@@ -91,6 +91,22 @@ def test_setitem_tuple(self):
         cat[1] = cat[0]
         assert cat[1] == (0, 1)
 
+    def test_setitem_listlike(self):
+
+        # GH#9469
+        # properly coerce the input indexers
+        np.random.seed(1)
+        c = Categorical(
+            np.random.randint(0, 5, size=150000).astype(np.int8)
+        ).add_categories([-1000])
+        indexer = np.array([100000]).astype(np.int64)
+        c[indexer] = -1000
+
+        # we are asserting the code result here
+        # which maps to the -1000 category
+        result = c.codes[np.array([100000]).astype(np.int64)]
+        tm.assert_numpy_array_equal(result, np.array([5], dtype="int8"))
+
 
 class TestCategoricalIndexing:
     def test_getitem_slice(self):

diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py
@@ -5,8 +5,18 @@
 import pytest
 
 import pandas as pd
-from pandas import Categorical, DataFrame, Index, MultiIndex, Series, date_range, isna
+from pandas import (
+    Categorical,
+    CategoricalIndex,
+    DataFrame,
+    Index,
+    MultiIndex,
+    Series,
+    date_range,
+    isna,
+)
 import pandas._testing as tm
+from pandas.api.types import CategoricalDtype as CDT
 import pandas.core.common as com
 
 
@@ -745,3 +755,94 @@ def test_reindex_multi_categorical_time(self):
         result = df2.reindex(midx)
         expected = DataFrame({"a": [0, 1, 2, 3, 4, 5, 6, np.nan, 8]}, index=midx)
         tm.assert_frame_equal(result, expected)
+
+    def test_reindex_with_categoricalindex(self):
+        df = DataFrame(
+            {
+                "A": np.arange(3, dtype="int64"),
+            },
+            index=CategoricalIndex(list("abc"), dtype=CDT(list("cabe")), name="B"),
+        )
+
+        # reindexing
+        # convert to a regular index
+        result = df.reindex(["a", "b", "e"])
+        expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index(
+            "B"
+        )
+        tm.assert_frame_equal(result, expected, check_index_type=True)
+
+        result = df.reindex(["a", "b"])
+        expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B")
+        tm.assert_frame_equal(result, expected, check_index_type=True)
+
+        result = df.reindex(["e"])
+        expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B")
+        tm.assert_frame_equal(result, expected, check_index_type=True)
+
+        result = df.reindex(["d"])
+        expected = DataFrame({"A": [np.nan], "B": Series(["d"])}).set_index("B")
+        tm.assert_frame_equal(result, expected, check_index_type=True)
+
+        # since we are actually reindexing with a Categorical
+        # then return a Categorical
+        cats = list("cabe")
+
+        result = df.reindex(Categorical(["a", "e"], categories=cats))
+        expected = DataFrame(
+            {"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats))}
+        ).set_index("B")
+        tm.assert_frame_equal(result, expected, check_index_type=True)
+
+        result = df.reindex(Categorical(["a"], categories=cats))
+        expected = DataFrame(
+            {"A": [0], "B": Series(list("a")).astype(CDT(cats))}
+        ).set_index("B")
+        tm.assert_frame_equal(result, expected, check_index_type=True)
+
+        result = df.reindex(["a", "b", "e"])
+        expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index(
+            "B"
+        )
+        tm.assert_frame_equal(result, expected, check_index_type=True)
+
+        result = df.reindex(["a", "b"])
+        expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B")
+        tm.assert_frame_equal(result, expected, check_index_type=True)
+
+        result = df.reindex(["e"])
+        expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B")
+        tm.assert_frame_equal(result, expected, check_index_type=True)
+
+        # give back the type of categorical that we received
+        result = df.reindex(Categorical(["a", "e"], categories=cats, ordered=True))
+        expected = DataFrame(
+            {"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats, ordered=True))}
+        ).set_index("B")
+        tm.assert_frame_equal(result, expected, check_index_type=True)
+
+        result = df.reindex(Categorical(["a", "d"], categories=["a", "d"]))
+        expected = DataFrame(
+            {"A": [0, np.nan], "B": Series(list("ad")).astype(CDT(["a", "d"]))}
+        ).set_index("B")
+        tm.assert_frame_equal(result, expected, check_index_type=True)
+
+        df2 = DataFrame(
+            {
+                "A": np.arange(6, dtype="int64"),
+            },
+            index=CategoricalIndex(list("aabbca"), dtype=CDT(list("cabe")), name="B"),
+        )
+        # passed duplicate indexers are not allowed
+        msg = "cannot reindex from a duplicate axis"
+        with pytest.raises(ValueError, match=msg):
+            df2.reindex(["a", "b"])
+
+        # args NotImplemented ATM
+        msg = r"argument {} is not implemented for CategoricalIndex\.reindex"
+        with pytest.raises(NotImplementedError, match=msg.format("method")):
+            df.reindex(["a"], method="ffill")
+        with pytest.raises(NotImplementedError, match=msg.format("level")):
+            df.reindex(["a"], level=1)
+        with pytest.raises(NotImplementedError, match=msg.format("limit")):
+            df.reindex(["a"], limit=2)
diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py
@@ -4,6 +4,7 @@
 import pandas as pd
 from pandas import (
     CategoricalDtype,
+    CategoricalIndex,
     DataFrame,
     Index,
     IntervalIndex,
@@ -495,7 +496,7 @@ def test_sort_index_categorical_multiindex(self):
             columns=["a"],
             index=MultiIndex(
                 levels=[
-                    pd.CategoricalIndex(
+                    CategoricalIndex(
                         ["c", "a", "b"],
                         categories=["c", "a", "b"],
                         ordered=True,
@@ -736,6 +737,34 @@ def test_sort_index_multilevel_repr_8017(self, gen, extra):
         result = result.sort_index(axis=1)
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.parametrize(
+        "categories",
+        [
+            pytest.param(["a", "b", "c"], id="str"),
+            pytest.param(
+                [pd.Interval(0, 1), pd.Interval(1, 2), pd.Interval(2, 3)],
+                id="pd.Interval",
+            ),
+        ],
+    )
+    def test_sort_index_with_categories(self, categories):
+        # GH#23452
+        df = DataFrame(
+            {"foo": range(len(categories))},
+            index=CategoricalIndex(
+                data=categories, categories=categories, ordered=True
+            ),
+        )
+        df.index = df.index.reorder_categories(df.index.categories[::-1])
+        result = df.sort_index()
+        expected = DataFrame(
+            {"foo": reversed(range(len(categories)))},
+            index=CategoricalIndex(
+                data=categories[::-1], categories=categories[::-1], ordered=True
+            ),
+        )
+        tm.assert_frame_equal(result, expected)
+
 
 class TestDataFrameSortIndexKey:
     def test_sort_multi_index_key(self):

diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py
@@ -3,6 +3,8 @@
 import numpy as np
 import pytest
 
+from pandas.errors import PerformanceWarning
+
 import pandas as pd
 from pandas import Categorical, DataFrame, NaT, Timestamp, date_range
 import pandas._testing as tm
@@ -711,3 +713,90 @@ def sorter(key):
         )
 
         tm.assert_frame_equal(result, expected)
+
+
+@pytest.fixture
+def df_none():
+    return DataFrame(
+        {
+            "outer": ["a", "a", "a", "b", "b", "b"],
+            "inner": [1, 2, 2, 2, 1, 1],
+            "A": np.arange(6, 0, -1),
+            ("B", 5): ["one", "one", "two", "two", "one", "one"],
+        }
+    )
+
+
+@pytest.fixture(params=[["outer"], ["outer", "inner"]])
+def df_idx(request, df_none):
+    levels = request.param
+    return df_none.set_index(levels)
+
+
+@pytest.fixture(
+    params=[
+        "inner",  # index level
+        ["outer"],  # list of index level
+        "A",  # column
+        [("B", 5)],  # list of column
+        ["inner", "outer"],  # two index levels
+        [("B", 5), "outer"],  # index level and column
+        ["A", ("B", 5)],  # Two columns
+        ["inner", "outer"],  # two index levels and column
+    ]
+)
+def sort_names(request):
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def ascending(request):
+    return request.param
+
+
+class TestSortValuesLevelAsStr:
+    def test_sort_index_level_and_column_label(
+        self, df_none, df_idx, sort_names, ascending
+    ):
+        # GH#14353
+
+        # Get index levels from df_idx
+        levels = df_idx.index.names
+
+        # Compute expected by sorting on columns and the setting index
+        expected = df_none.sort_values(
+            by=sort_names, ascending=ascending, axis=0
+        ).set_index(levels)
+
+        # Compute result sorting on mix on columns and index levels
+        result = df_idx.sort_values(by=sort_names, ascending=ascending, axis=0)
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_sort_column_level_and_index_label(
+        self, df_none, df_idx, sort_names, ascending
+    ):
+        # GH#14353
+
+        # Get levels from df_idx
+        levels = df_idx.index.names
+
+        # Compute expected by sorting on axis=0, setting index levels, and then
+        # transposing. For some cases this will result in a frame with
+        # multiple column levels
+        expected = (
+            df_none.sort_values(by=sort_names, ascending=ascending, axis=0)
+            .set_index(levels)
+            .T
+        )
+
+        # Compute result by transposing and sorting on axis=1.
+        result = df_idx.T.sort_values(by=sort_names, ascending=ascending, axis=1)
+
+        if len(levels) > 1:
+            # Accessing multi-level columns that are not lexsorted raises a
+            # performance warning
+            with tm.assert_produces_warning(PerformanceWarning, check_stacklevel=False):
+                tm.assert_frame_equal(result, expected)
+        else:
+            tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -2699,6 +2699,13 @@ def test_frame_ctor_datetime64_column(self):
 
 
 class TestDataFrameConstructorWithDatetimeTZ:
+    def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture):
+        # GH#25843
+        tz = tz_aware_fixture
+        result = DataFrame({"d": [Timestamp("2019", tz=tz)]}, dtype="datetime64[ns]")
+        expected = DataFrame({"d": [Timestamp("2019")]})
+        tm.assert_frame_equal(result, expected)
+
     def test_from_dict(self):
 
         # 8260

diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py
@@ -4,7 +4,7 @@
 import pytest
 
 import pandas as pd
-from pandas import DataFrame, Index, MultiIndex, period_range
+from pandas import DataFrame, Index, MultiIndex, date_range, period_range
 import pandas._testing as tm
 
 
@@ -341,3 +341,24 @@ def test_merge_join_different_levels(self):
         with tm.assert_produces_warning(UserWarning):
             result = df1.join(df2, on="a")
         tm.assert_frame_equal(result, expected)
+
+    def test_frame_join_tzaware(self):
+        test1 = DataFrame(
+            np.zeros((6, 3)),
+            index=date_range(
+                "2012-11-15 00:00:00", periods=6, freq="100L", tz="US/Central"
+            ),
+        )
+        test2 = DataFrame(
+            np.zeros((3, 3)),
+            index=date_range(
+                "2012-11-15 00:00:00", periods=3, freq="250L", tz="US/Central"
+            ),
+            columns=range(3, 6),
+        )
+
+        result = test1.join(test2, how="outer")
+        expected = test1.index.union(test2.index)
+
+        tm.assert_index_equal(result.index, expected)
+        assert result.index.tz.zone == "US/Central"