diff --git a/pandas/conftest.py b/pandas/conftest.py index 5a4bc397ab792..515d20e8c5781 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -34,7 +34,7 @@ import pandas.util._test_decorators as td import pandas as pd -from pandas import DataFrame +from pandas import DataFrame, Series import pandas._testing as tm from pandas.core import ops from pandas.core.indexes.api import Index, MultiIndex @@ -529,6 +529,23 @@ def series_with_simple_index(index): return _create_series(index) +@pytest.fixture +def series_with_multilevel_index(): + """ + Fixture with a Series with a 2-level MultiIndex. + """ + arrays = [ + ["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + tuples = zip(*arrays) + index = MultiIndex.from_tuples(tuples) + data = np.random.randn(8) + ser = Series(data, index=index) + ser[3] = np.NaN + return ser + + _narrow_dtypes = [ np.float16, np.float32, diff --git a/pandas/tests/frame/methods/test_count.py b/pandas/tests/frame/methods/test_count.py index 6d4ce3fa0dd4e..d738c7139093c 100644 --- a/pandas/tests/frame/methods/test_count.py +++ b/pandas/tests/frame/methods/test_count.py @@ -6,6 +6,24 @@ class TestDataFrameCount: + def test_count_multiindex(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + frame = frame.copy() + frame.index.names = ["a", "b"] + + result = frame.count(level="b") + expected = frame.count(level=1) + tm.assert_frame_equal(result, expected, check_names=False) + + result = frame.count(level="a") + expected = frame.count(level=0) + tm.assert_frame_equal(result, expected, check_names=False) + + msg = "Level x not found" + with pytest.raises(KeyError, match=msg): + frame.count(level="x") + def test_count(self): # corner case frame = DataFrame() diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index b88ef0e6691cb..ca4eeaf9ac807 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -1,8 +1,11 @@ from datetime import datetime +from itertools import product import numpy as np import pytest +from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype + import pandas as pd from pandas import ( DataFrame, @@ -301,6 +304,194 @@ def test_reset_index_range(self): ) tm.assert_frame_equal(result, expected) + def test_reset_index_multiindex_columns(self): + levels = [["A", ""], ["B", "b"]] + df = DataFrame([[0, 2], [1, 3]], columns=MultiIndex.from_tuples(levels)) + result = df[["B"]].rename_axis("A").reset_index() + tm.assert_frame_equal(result, df) + + # GH#16120: already existing column + msg = r"cannot insert \('A', ''\), already exists" + with pytest.raises(ValueError, match=msg): + df.rename_axis("A").reset_index() + + # GH#16164: multiindex (tuple) full key + result = df.set_index([("A", "")]).reset_index() + tm.assert_frame_equal(result, df) + + # with additional (unnamed) index level + idx_col = DataFrame( + [[0], [1]], columns=MultiIndex.from_tuples([("level_0", "")]) + ) + expected = pd.concat([idx_col, df[[("B", "b"), ("A", "")]]], axis=1) + result = df.set_index([("B", "b")], append=True).reset_index() + tm.assert_frame_equal(result, expected) + + # with index name which is a too long tuple... + msg = "Item must have length equal to number of levels." + with pytest.raises(ValueError, match=msg): + df.rename_axis([("C", "c", "i")]).reset_index() + + # or too short... + levels = [["A", "a", ""], ["B", "b", "i"]] + df2 = DataFrame([[0, 2], [1, 3]], columns=MultiIndex.from_tuples(levels)) + idx_col = DataFrame( + [[0], [1]], columns=MultiIndex.from_tuples([("C", "c", "ii")]) + ) + expected = pd.concat([idx_col, df2], axis=1) + result = df2.rename_axis([("C", "c")]).reset_index(col_fill="ii") + tm.assert_frame_equal(result, expected) + + # ... which is incompatible with col_fill=None + with pytest.raises( + ValueError, + match=( + "col_fill=None is incompatible with " + r"incomplete column name \('C', 'c'\)" + ), + ): + df2.rename_axis([("C", "c")]).reset_index(col_fill=None) + + # with col_level != 0 + result = df2.rename_axis([("c", "ii")]).reset_index(col_level=1, col_fill="C") + tm.assert_frame_equal(result, expected) + + def test_reset_index_datetime(self, tz_naive_fixture): + # GH#3950 + tz = tz_naive_fixture + idx1 = pd.date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx1") + idx2 = Index(range(5), name="idx2", dtype="int64") + idx = MultiIndex.from_arrays([idx1, idx2]) + df = DataFrame( + {"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]}, + index=idx, + ) + + expected = DataFrame( + { + "idx1": [ + datetime(2011, 1, 1), + datetime(2011, 1, 2), + datetime(2011, 1, 3), + datetime(2011, 1, 4), + datetime(2011, 1, 5), + ], + "idx2": np.arange(5, dtype="int64"), + "a": np.arange(5, dtype="int64"), + "b": ["A", "B", "C", "D", "E"], + }, + columns=["idx1", "idx2", "a", "b"], + ) + expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz)) + + tm.assert_frame_equal(df.reset_index(), expected) + + idx3 = pd.date_range( + "1/1/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3" + ) + idx = MultiIndex.from_arrays([idx1, idx2, idx3]) + df = DataFrame( + {"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]}, + index=idx, + ) + + expected = DataFrame( + { + "idx1": [ + datetime(2011, 1, 1), + datetime(2011, 1, 2), + datetime(2011, 1, 3), + datetime(2011, 1, 4), + datetime(2011, 1, 5), + ], + "idx2": np.arange(5, dtype="int64"), + "idx3": [ + datetime(2012, 1, 1), + datetime(2012, 2, 1), + datetime(2012, 3, 1), + datetime(2012, 4, 1), + datetime(2012, 5, 1), + ], + "a": np.arange(5, dtype="int64"), + "b": ["A", "B", "C", "D", "E"], + }, + columns=["idx1", "idx2", "idx3", "a", "b"], + ) + expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz)) + expected["idx3"] = expected["idx3"].apply( + lambda d: Timestamp(d, tz="Europe/Paris") + ) + tm.assert_frame_equal(df.reset_index(), expected) + + # GH#7793 + idx = MultiIndex.from_product( + [["a", "b"], pd.date_range("20130101", periods=3, tz=tz)] + ) + df = DataFrame( + np.arange(6, dtype="int64").reshape(6, 1), columns=["a"], index=idx + ) + + expected = DataFrame( + { + "level_0": "a a a b b b".split(), + "level_1": [ + datetime(2013, 1, 1), + datetime(2013, 1, 2), + datetime(2013, 1, 3), + ] + * 2, + "a": np.arange(6, dtype="int64"), + }, + columns=["level_0", "level_1", "a"], + ) + expected["level_1"] = expected["level_1"].apply( + lambda d: Timestamp(d, freq="D", tz=tz) + ) + result = df.reset_index() + tm.assert_frame_equal(result, expected) + + def test_reset_index_period(self): + # GH#7746 + idx = MultiIndex.from_product( + [pd.period_range("20130101", periods=3, freq="M"), list("abc")], + names=["month", "feature"], + ) + + df = DataFrame( + np.arange(9, dtype="int64").reshape(-1, 1), index=idx, columns=["a"] + ) + expected = DataFrame( + { + "month": ( + [pd.Period("2013-01", freq="M")] * 3 + + [pd.Period("2013-02", freq="M")] * 3 + + [pd.Period("2013-03", freq="M")] * 3 + ), + "feature": ["a", "b", "c"] * 3, + "a": np.arange(9, dtype="int64"), + }, + columns=["month", "feature", "a"], + ) + result = df.reset_index() + tm.assert_frame_equal(result, expected) + + def test_reset_index_delevel_infer_dtype(self): + tuples = list(product(["foo", "bar"], [10, 20], [1.0, 1.1])) + index = MultiIndex.from_tuples(tuples, names=["prm0", "prm1", "prm2"]) + df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"], index=index) + deleveled = df.reset_index() + assert is_integer_dtype(deleveled["prm1"]) + assert is_float_dtype(deleveled["prm2"]) + + def test_reset_index_with_drop( + self, multiindex_year_month_day_dataframe_random_data + ): + ymd = multiindex_year_month_day_dataframe_random_data + + deleveled = ymd.reset_index(drop=True) + assert len(deleveled.columns) == len(ymd.columns) + assert deleveled.index.name == ymd.index.name + @pytest.mark.parametrize( "array, dtype", diff --git a/pandas/tests/frame/methods/test_set_index.py b/pandas/tests/frame/methods/test_set_index.py index 8927ab7c5ef79..29cd3c2d535d9 100644 --- a/pandas/tests/frame/methods/test_set_index.py +++ b/pandas/tests/frame/methods/test_set_index.py @@ -17,6 +17,17 @@ class TestSetIndex: + def test_set_index_multiindex(self): + # segfault in GH#3308 + d = {"t1": [2, 2.5, 3], "t2": [4, 5, 6]} + df = DataFrame(d) + tuples = [(0, 1), (0, 2), (1, 2)] + df["tuples"] = tuples + + index = MultiIndex.from_tuples(df["tuples"]) + # it works! + df.set_index(index) + def test_set_index_empty_column(self): # GH#1971 df = DataFrame( diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py index 58260f3613b5e..50d643cf83d7f 100644 --- a/pandas/tests/frame/methods/test_sort_index.py +++ b/pandas/tests/frame/methods/test_sort_index.py @@ -2,7 +2,15 @@ import pytest import pandas as pd -from pandas import CategoricalDtype, DataFrame, Index, IntervalIndex, MultiIndex, Series +from pandas import ( + CategoricalDtype, + DataFrame, + Index, + IntervalIndex, + MultiIndex, + Series, + Timestamp, +) import pandas._testing as tm @@ -668,6 +676,66 @@ def test_sort_index_preserve_levels(self, multiindex_dataframe_random_data): result = frame.sort_index() assert result.index.names == frame.index.names + @pytest.mark.parametrize( + "gen,extra", + [ + ([1.0, 3.0, 2.0, 5.0], 4.0), + ([1, 3, 2, 5], 4), + ( + [ + Timestamp("20130101"), + Timestamp("20130103"), + Timestamp("20130102"), + Timestamp("20130105"), + ], + Timestamp("20130104"), + ), + (["1one", "3one", "2one", "5one"], "4one"), + ], + ) + def test_sort_index_multilevel_repr_8017(self, gen, extra): + + np.random.seed(0) + data = np.random.randn(3, 4) + + columns = MultiIndex.from_tuples([("red", i) for i in gen]) + df = DataFrame(data, index=list("def"), columns=columns) + df2 = pd.concat( + [ + df, + DataFrame( + "world", + index=list("def"), + columns=MultiIndex.from_tuples([("red", extra)]), + ), + ], + axis=1, + ) + + # check that the repr is good + # make sure that we have a correct sparsified repr + # e.g. only 1 header of read + assert str(df2).splitlines()[0].split() == ["red"] + + # GH 8017 + # sorting fails after columns added + + # construct single-dtype then sort + result = df.copy().sort_index(axis=1) + expected = df.iloc[:, [0, 2, 1, 3]] + tm.assert_frame_equal(result, expected) + + result = df2.sort_index(axis=1) + expected = df2.iloc[:, [0, 2, 1, 4, 3]] + tm.assert_frame_equal(result, expected) + + # setitem then sort + result = df.copy() + result[("red", extra)] = "world" + + result = result.sort_index(axis=1) + tm.assert_frame_equal(result, expected) + class TestDataFrameSortIndexKey: def test_sort_multi_index_key(self): diff --git a/pandas/tests/frame/methods/test_swaplevel.py b/pandas/tests/frame/methods/test_swaplevel.py new file mode 100644 index 0000000000000..5511ac7d6b1b2 --- /dev/null +++ b/pandas/tests/frame/methods/test_swaplevel.py @@ -0,0 +1,36 @@ +import pytest + +from pandas import DataFrame +import pandas._testing as tm + + +class TestSwaplevel: + def test_swaplevel(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + swapped = frame["A"].swaplevel() + swapped2 = frame["A"].swaplevel(0) + swapped3 = frame["A"].swaplevel(0, 1) + swapped4 = frame["A"].swaplevel("first", "second") + assert not swapped.index.equals(frame.index) + tm.assert_series_equal(swapped, swapped2) + tm.assert_series_equal(swapped, swapped3) + tm.assert_series_equal(swapped, swapped4) + + back = swapped.swaplevel() + back2 = swapped.swaplevel(0) + back3 = swapped.swaplevel(0, 1) + back4 = swapped.swaplevel("second", "first") + assert back.index.equals(frame.index) + tm.assert_series_equal(back, back2) + tm.assert_series_equal(back, back3) + tm.assert_series_equal(back, back4) + + ft = frame.T + swapped = ft.swaplevel("first", "second", axis=1) + exp = frame.swaplevel("first", "second").T + tm.assert_frame_equal(swapped, exp) + + msg = "Can only swap levels on a hierarchical axis." + with pytest.raises(TypeError, match=msg): + DataFrame(range(3)).swaplevel() diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index acc87defb568c..25795c242528c 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1561,6 +1561,19 @@ def test_constructor_from_dict_tuples(self, data_dict, keys, orient): tm.assert_index_equal(result, expected) + def test_frame_dict_constructor_empty_series(self): + s1 = Series( + [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)]) + ) + s2 = Series( + [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)]) + ) + s3 = Series(dtype=object) + + # it works! + DataFrame({"foo": s1, "bar": s2, "baz": s3}) + DataFrame.from_dict({"foo": s1, "baz": s3, "bar": s2}) + def test_constructor_Series_named(self): a = Series([1, 2, 3], index=["a", "b", "c"], name="x") df = DataFrame(a) diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 641331d73ff7a..16d223048e374 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -8,6 +8,7 @@ from pandas import ( Categorical, DataFrame, + MultiIndex, PeriodIndex, Series, date_range, @@ -20,6 +21,66 @@ class TestDataFrameReprInfoEtc: + def test_assign_index_sequences(self): + # GH#2200 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}).set_index( + ["a", "b"] + ) + index = list(df.index) + index[0] = ("faz", "boo") + df.index = index + repr(df) + + # this travels an improper code path + index[0] = ["faz", "boo"] + df.index = index + repr(df) + + def test_multiindex_na_repr(self): + # only an issue with long columns + df3 = DataFrame( + { + "A" * 30: {("A", "A0006000", "nuit"): "A0006000"}, + "B" * 30: {("A", "A0006000", "nuit"): np.nan}, + "C" * 30: {("A", "A0006000", "nuit"): np.nan}, + "D" * 30: {("A", "A0006000", "nuit"): np.nan}, + "E" * 30: {("A", "A0006000", "nuit"): "A"}, + "F" * 30: {("A", "A0006000", "nuit"): np.nan}, + } + ) + + idf = df3.set_index(["A" * 30, "C" * 30]) + repr(idf) + + def test_repr_name_coincide(self): + index = MultiIndex.from_tuples( + [("a", 0, "foo"), ("b", 1, "bar")], names=["a", "b", "c"] + ) + + df = DataFrame({"value": [0, 1]}, index=index) + + lines = repr(df).split("\n") + assert lines[2].startswith("a 0 foo") + + def test_repr_to_string( + self, + multiindex_year_month_day_dataframe_random_data, + multiindex_dataframe_random_data, + ): + ymd = multiindex_year_month_day_dataframe_random_data + frame = multiindex_dataframe_random_data + + repr(frame) + repr(ymd) + repr(frame.T) + repr(ymd.T) + + buf = StringIO() + frame.to_string(buf=buf) + ymd.to_string(buf=buf) + frame.T.to_string(buf=buf) + ymd.T.to_string(buf=buf) + def test_repr_empty(self): # empty repr(DataFrame()) diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index b9132f429905d..6f79878fd3ab1 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -27,6 +27,15 @@ def test_get_level_number_integer(idx): idx._get_level_number("fourth") +def test_get_level_number_out_of_bounds(multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + + with pytest.raises(IndexError, match="Too many levels"): + frame.index._get_level_number(2) + with pytest.raises(IndexError, match="not a valid level number"): + frame.index._get_level_number(-3) + + def test_set_name_methods(idx, index_names): # so long as these are synonyms, we don't need to test set_names assert idx.rename == idx.set_names diff --git a/pandas/tests/series/methods/test_count.py b/pandas/tests/series/methods/test_count.py index 3c5957706b144..df1babbee8e18 100644 --- a/pandas/tests/series/methods/test_count.py +++ b/pandas/tests/series/methods/test_count.py @@ -7,6 +7,24 @@ class TestSeriesCount: + def test_count_multiindex(self, series_with_multilevel_index): + ser = series_with_multilevel_index + + series = ser.copy() + series.index.names = ["a", "b"] + + result = series.count(level="b") + expect = ser.count(level=1).rename_axis("b") + tm.assert_series_equal(result, expect) + + result = series.count(level="a") + expect = ser.count(level=0).rename_axis("a") + tm.assert_series_equal(result, expect) + + msg = "Level x not found" + with pytest.raises(KeyError, match=msg): + series.count("x") + def test_count_level_without_multiindex(self): ser = Series(range(3)) diff --git a/pandas/tests/series/methods/test_reset_index.py b/pandas/tests/series/methods/test_reset_index.py index 1474bb95f4af2..13d6a3b1447a1 100644 --- a/pandas/tests/series/methods/test_reset_index.py +++ b/pandas/tests/series/methods/test_reset_index.py @@ -111,6 +111,18 @@ def test_reset_index_drop_errors(self): with pytest.raises(KeyError, match="not found"): s.reset_index("wrong", drop=True) + def test_reset_index_with_drop(self, series_with_multilevel_index): + ser = series_with_multilevel_index + + deleveled = ser.reset_index() + assert isinstance(deleveled, DataFrame) + assert len(deleveled.columns) == len(ser.index.levels) + 1 + assert deleveled.index.name == ser.index.name + + deleveled = ser.reset_index(drop=True) + assert isinstance(deleveled, Series) + assert deleveled.index.name == ser.index.name + @pytest.mark.parametrize( "array, dtype", diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index f3d1f949c1475..8b8e49d914905 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1,15 +1,8 @@ -import datetime -from io import StringIO -from itertools import product - import numpy as np -from numpy.random import randn import pytest -from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype - import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series, Timestamp +from pandas import DataFrame, MultiIndex, Series import pandas._testing as tm AGG_FUNCTIONS = [ @@ -27,22 +20,7 @@ ] -class Base: - def setup_method(self, method): - - # create test series object - arrays = [ - ["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"], - ["one", "two", "one", "two", "one", "two", "one", "two"], - ] - tuples = zip(*arrays) - index = MultiIndex.from_tuples(tuples) - s = Series(randn(8), index=index) - s[3] = np.NaN - self.series = s - - -class TestMultiLevel(Base): +class TestMultiLevel: def test_append(self, multiindex_dataframe_random_data): frame = multiindex_dataframe_random_data @@ -168,61 +146,6 @@ def test_reindex_preserve_levels( chunk = ymdT.loc[:, new_index] assert chunk.columns is new_index - def test_repr_to_string( - self, - multiindex_year_month_day_dataframe_random_data, - multiindex_dataframe_random_data, - ): - ymd = multiindex_year_month_day_dataframe_random_data - frame = multiindex_dataframe_random_data - - repr(frame) - repr(ymd) - repr(frame.T) - repr(ymd.T) - - buf = StringIO() - frame.to_string(buf=buf) - ymd.to_string(buf=buf) - frame.T.to_string(buf=buf) - ymd.T.to_string(buf=buf) - - def test_repr_name_coincide(self): - index = MultiIndex.from_tuples( - [("a", 0, "foo"), ("b", 1, "bar")], names=["a", "b", "c"] - ) - - df = DataFrame({"value": [0, 1]}, index=index) - - lines = repr(df).split("\n") - assert lines[2].startswith("a 0 foo") - - def test_delevel_infer_dtype(self): - tuples = list(product(["foo", "bar"], [10, 20], [1.0, 1.1])) - index = MultiIndex.from_tuples(tuples, names=["prm0", "prm1", "prm2"]) - df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"], index=index) - deleveled = df.reset_index() - assert is_integer_dtype(deleveled["prm1"]) - assert is_float_dtype(deleveled["prm2"]) - - def test_reset_index_with_drop( - self, multiindex_year_month_day_dataframe_random_data - ): - ymd = multiindex_year_month_day_dataframe_random_data - - deleveled = ymd.reset_index(drop=True) - assert len(deleveled.columns) == len(ymd.columns) - assert deleveled.index.name == ymd.index.name - - deleveled = self.series.reset_index() - assert isinstance(deleveled, DataFrame) - assert len(deleveled.columns) == len(self.series.index.levels) + 1 - assert deleveled.index.name == self.series.index.name - - deleveled = self.series.reset_index(drop=True) - assert isinstance(deleveled, Series) - assert deleveled.index.name == self.series.index.name - def test_count_level_series(self): index = MultiIndex( levels=[["foo", "bar", "baz"], ["one", "two", "three", "four"]], @@ -243,14 +166,6 @@ def test_count_level_series(self): result.astype("f8"), expected.reindex(result.index).fillna(0) ) - def test_get_level_number_out_of_bounds(self, multiindex_dataframe_random_data): - frame = multiindex_dataframe_random_data - - with pytest.raises(IndexError, match="Too many levels"): - frame.index._get_level_number(2) - with pytest.raises(IndexError, match="not a valid level number"): - frame.index._get_level_number(-3) - def test_unused_level_raises(self): # GH 20410 mi = MultiIndex( @@ -319,36 +234,6 @@ def test_join(self, multiindex_dataframe_random_data): # TODO what should join do with names ? tm.assert_frame_equal(joined, expected, check_names=False) - def test_swaplevel(self, multiindex_dataframe_random_data): - frame = multiindex_dataframe_random_data - - swapped = frame["A"].swaplevel() - swapped2 = frame["A"].swaplevel(0) - swapped3 = frame["A"].swaplevel(0, 1) - swapped4 = frame["A"].swaplevel("first", "second") - assert not swapped.index.equals(frame.index) - tm.assert_series_equal(swapped, swapped2) - tm.assert_series_equal(swapped, swapped3) - tm.assert_series_equal(swapped, swapped4) - - back = swapped.swaplevel() - back2 = swapped.swaplevel(0) - back3 = swapped.swaplevel(0, 1) - back4 = swapped.swaplevel("second", "first") - assert back.index.equals(frame.index) - tm.assert_series_equal(back, back2) - tm.assert_series_equal(back, back3) - tm.assert_series_equal(back, back4) - - ft = frame.T - swapped = ft.swaplevel("first", "second", axis=1) - exp = frame.swaplevel("first", "second").T - tm.assert_frame_equal(swapped, exp) - - msg = "Can only swap levels on a hierarchical axis." - with pytest.raises(TypeError, match=msg): - DataFrame(range(3)).swaplevel() - def test_insert_index(self, multiindex_year_month_day_dataframe_random_data): ymd = multiindex_year_month_day_dataframe_random_data @@ -377,47 +262,20 @@ def test_alignment(self): exp = x.reindex(exp_index) - y.reindex(exp_index) tm.assert_series_equal(res, exp) - def test_count(self, multiindex_dataframe_random_data): - frame = multiindex_dataframe_random_data - - frame = frame.copy() - frame.index.names = ["a", "b"] - - result = frame.count(level="b") - expect = frame.count(level=1) - tm.assert_frame_equal(result, expect, check_names=False) - - result = frame.count(level="a") - expect = frame.count(level=0) - tm.assert_frame_equal(result, expect, check_names=False) - - series = self.series.copy() - series.index.names = ["a", "b"] - - result = series.count(level="b") - expect = self.series.count(level=1).rename_axis("b") - tm.assert_series_equal(result, expect) - - result = series.count(level="a") - expect = self.series.count(level=0).rename_axis("a") - tm.assert_series_equal(result, expect) - - msg = "Level x not found" - with pytest.raises(KeyError, match=msg): - series.count("x") - with pytest.raises(KeyError, match=msg): - frame.count(level="x") - @pytest.mark.parametrize("op", AGG_FUNCTIONS) @pytest.mark.parametrize("level", [0, 1]) @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("sort", [True, False]) - def test_series_group_min_max(self, op, level, skipna, sort): + def test_series_group_min_max( + self, op, level, skipna, sort, series_with_multilevel_index + ): # GH 17537 - grouped = self.series.groupby(level=level, sort=sort) + ser = series_with_multilevel_index + + grouped = ser.groupby(level=level, sort=sort) # skipna=True leftside = grouped.agg(lambda x: getattr(x, op)(skipna=skipna)) - rightside = getattr(self.series, op)(level=level, skipna=skipna) + rightside = getattr(ser, op)(level=level, skipna=skipna) if sort: rightside = rightside.sort_index(level=level) tm.assert_series_equal(leftside, rightside) @@ -636,19 +494,6 @@ def test_join_segfault(self): for how in ["left", "right", "outer"]: df1.join(df2, how=how) - def test_frame_dict_constructor_empty_series(self): - s1 = Series( - [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)]) - ) - s2 = Series( - [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)]) - ) - s3 = Series(dtype=object) - - # it works! - DataFrame({"foo": s1, "bar": s2, "baz": s3}) - DataFrame.from_dict({"foo": s1, "baz": s3, "bar": s2}) - @pytest.mark.parametrize("d", [4, "d"]) def test_empty_frame_groupby_dtypes_consistency(self, d): # GH 20888 @@ -663,37 +508,6 @@ def test_empty_frame_groupby_dtypes_consistency(self, d): tm.assert_index_equal(result, expected) - def test_multiindex_na_repr(self): - # only an issue with long columns - df3 = DataFrame( - { - "A" * 30: {("A", "A0006000", "nuit"): "A0006000"}, - "B" * 30: {("A", "A0006000", "nuit"): np.nan}, - "C" * 30: {("A", "A0006000", "nuit"): np.nan}, - "D" * 30: {("A", "A0006000", "nuit"): np.nan}, - "E" * 30: {("A", "A0006000", "nuit"): "A"}, - "F" * 30: {("A", "A0006000", "nuit"): np.nan}, - } - ) - - idf = df3.set_index(["A" * 30, "C" * 30]) - repr(idf) - - def test_assign_index_sequences(self): - # #2200 - df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}).set_index( - ["a", "b"] - ) - index = list(df.index) - index[0] = ("faz", "boo") - df.index = index - repr(df) - - # this travels an improper code path - index[0] = ["faz", "boo"] - df.index = index - repr(df) - def test_duplicate_groupby_issues(self): idx_tp = [ ("600809", "20061231"), @@ -731,186 +545,6 @@ def test_duplicate_mi(self): result = df.loc[("foo", "bar")] tm.assert_frame_equal(result, expected) - def test_multiindex_set_index(self): - # segfault in #3308 - d = {"t1": [2, 2.5, 3], "t2": [4, 5, 6]} - df = DataFrame(d) - tuples = [(0, 1), (0, 2), (1, 2)] - df["tuples"] = tuples - - index = MultiIndex.from_tuples(df["tuples"]) - # it works! - df.set_index(index) - - def test_reset_index_datetime(self, tz_naive_fixture): - # GH 3950 - tz = tz_naive_fixture - idx1 = pd.date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx1") - idx2 = Index(range(5), name="idx2", dtype="int64") - idx = MultiIndex.from_arrays([idx1, idx2]) - df = DataFrame( - {"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]}, - index=idx, - ) - - expected = DataFrame( - { - "idx1": [ - datetime.datetime(2011, 1, 1), - datetime.datetime(2011, 1, 2), - datetime.datetime(2011, 1, 3), - datetime.datetime(2011, 1, 4), - datetime.datetime(2011, 1, 5), - ], - "idx2": np.arange(5, dtype="int64"), - "a": np.arange(5, dtype="int64"), - "b": ["A", "B", "C", "D", "E"], - }, - columns=["idx1", "idx2", "a", "b"], - ) - expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz)) - - tm.assert_frame_equal(df.reset_index(), expected) - - idx3 = pd.date_range( - "1/1/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3" - ) - idx = MultiIndex.from_arrays([idx1, idx2, idx3]) - df = DataFrame( - {"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]}, - index=idx, - ) - - expected = DataFrame( - { - "idx1": [ - datetime.datetime(2011, 1, 1), - datetime.datetime(2011, 1, 2), - datetime.datetime(2011, 1, 3), - datetime.datetime(2011, 1, 4), - datetime.datetime(2011, 1, 5), - ], - "idx2": np.arange(5, dtype="int64"), - "idx3": [ - datetime.datetime(2012, 1, 1), - datetime.datetime(2012, 2, 1), - datetime.datetime(2012, 3, 1), - datetime.datetime(2012, 4, 1), - datetime.datetime(2012, 5, 1), - ], - "a": np.arange(5, dtype="int64"), - "b": ["A", "B", "C", "D", "E"], - }, - columns=["idx1", "idx2", "idx3", "a", "b"], - ) - expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz)) - expected["idx3"] = expected["idx3"].apply( - lambda d: Timestamp(d, tz="Europe/Paris") - ) - tm.assert_frame_equal(df.reset_index(), expected) - - # GH 7793 - idx = MultiIndex.from_product( - [["a", "b"], pd.date_range("20130101", periods=3, tz=tz)] - ) - df = DataFrame( - np.arange(6, dtype="int64").reshape(6, 1), columns=["a"], index=idx - ) - - expected = DataFrame( - { - "level_0": "a a a b b b".split(), - "level_1": [ - datetime.datetime(2013, 1, 1), - datetime.datetime(2013, 1, 2), - datetime.datetime(2013, 1, 3), - ] - * 2, - "a": np.arange(6, dtype="int64"), - }, - columns=["level_0", "level_1", "a"], - ) - expected["level_1"] = expected["level_1"].apply( - lambda d: Timestamp(d, freq="D", tz=tz) - ) - tm.assert_frame_equal(df.reset_index(), expected) - - def test_reset_index_period(self): - # GH 7746 - idx = MultiIndex.from_product( - [pd.period_range("20130101", periods=3, freq="M"), list("abc")], - names=["month", "feature"], - ) - - df = DataFrame( - np.arange(9, dtype="int64").reshape(-1, 1), index=idx, columns=["a"] - ) - expected = DataFrame( - { - "month": ( - [pd.Period("2013-01", freq="M")] * 3 - + [pd.Period("2013-02", freq="M")] * 3 - + [pd.Period("2013-03", freq="M")] * 3 - ), - "feature": ["a", "b", "c"] * 3, - "a": np.arange(9, dtype="int64"), - }, - columns=["month", "feature", "a"], - ) - tm.assert_frame_equal(df.reset_index(), expected) - - def test_reset_index_multiindex_columns(self): - levels = [["A", ""], ["B", "b"]] - df = DataFrame([[0, 2], [1, 3]], columns=MultiIndex.from_tuples(levels)) - result = df[["B"]].rename_axis("A").reset_index() - tm.assert_frame_equal(result, df) - - # gh-16120: already existing column - msg = r"cannot insert \('A', ''\), already exists" - with pytest.raises(ValueError, match=msg): - df.rename_axis("A").reset_index() - - # gh-16164: multiindex (tuple) full key - result = df.set_index([("A", "")]).reset_index() - tm.assert_frame_equal(result, df) - - # with additional (unnamed) index level - idx_col = DataFrame( - [[0], [1]], columns=MultiIndex.from_tuples([("level_0", "")]) - ) - expected = pd.concat([idx_col, df[[("B", "b"), ("A", "")]]], axis=1) - result = df.set_index([("B", "b")], append=True).reset_index() - tm.assert_frame_equal(result, expected) - - # with index name which is a too long tuple... - msg = "Item must have length equal to number of levels." - with pytest.raises(ValueError, match=msg): - df.rename_axis([("C", "c", "i")]).reset_index() - - # or too short... - levels = [["A", "a", ""], ["B", "b", "i"]] - df2 = DataFrame([[0, 2], [1, 3]], columns=MultiIndex.from_tuples(levels)) - idx_col = DataFrame( - [[0], [1]], columns=MultiIndex.from_tuples([("C", "c", "ii")]) - ) - expected = pd.concat([idx_col, df2], axis=1) - result = df2.rename_axis([("C", "c")]).reset_index(col_fill="ii") - tm.assert_frame_equal(result, expected) - - # ... which is incompatible with col_fill=None - with pytest.raises( - ValueError, - match=( - "col_fill=None is incompatible with " - r"incomplete column name \('C', 'c'\)" - ), - ): - df2.rename_axis([("C", "c")]).reset_index(col_fill=None) - - # with col_level != 0 - result = df2.rename_axis([("c", "ii")]).reset_index(col_level=1, col_fill="C") - tm.assert_frame_equal(result, expected) - def test_subsets_multiindex_dtype(self): # GH 20757 data = [["x", 1]] @@ -921,69 +555,9 @@ def test_subsets_multiindex_dtype(self): tm.assert_series_equal(result, expected) -class TestSorted(Base): +class TestSorted: """ everything you wanted to test about sorting """ - @pytest.mark.parametrize( - "gen,extra", - [ - ([1.0, 3.0, 2.0, 5.0], 4.0), - ([1, 3, 2, 5], 4), - ( - [ - Timestamp("20130101"), - Timestamp("20130103"), - Timestamp("20130102"), - Timestamp("20130105"), - ], - Timestamp("20130104"), - ), - (["1one", "3one", "2one", "5one"], "4one"), - ], - ) - def test_sorting_repr_8017(self, gen, extra): - - np.random.seed(0) - data = np.random.randn(3, 4) - - columns = MultiIndex.from_tuples([("red", i) for i in gen]) - df = DataFrame(data, index=list("def"), columns=columns) - df2 = pd.concat( - [ - df, - DataFrame( - "world", - index=list("def"), - columns=MultiIndex.from_tuples([("red", extra)]), - ), - ], - axis=1, - ) - - # check that the repr is good - # make sure that we have a correct sparsified repr - # e.g. only 1 header of read - assert str(df2).splitlines()[0].split() == ["red"] - - # GH 8017 - # sorting fails after columns added - - # construct single-dtype then sort - result = df.copy().sort_index(axis=1) - expected = df.iloc[:, [0, 2, 1, 3]] - tm.assert_frame_equal(result, expected) - - result = df2.sort_index(axis=1) - expected = df2.iloc[:, [0, 2, 1, 4, 3]] - tm.assert_frame_equal(result, expected) - - # setitem then sort - result = df.copy() - result[("red", extra)] = "world" - - result = result.sort_index(axis=1) - tm.assert_frame_equal(result, expected) - def test_sort_non_lexsorted(self): # degenerate case where we sort but don't # have a satisfying result :<