From 2354bf28146125daa153558212ce8e51bc46ea76 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 20 Jan 2020 12:12:18 -0800 Subject: [PATCH 1/4] TST: Add tests for fixed issues --- pandas/tests/frame/test_constructors.py | 18 +++++++++++++ pandas/tests/groupby/test_apply.py | 19 +++++++++++++ pandas/tests/groupby/test_groupby.py | 7 +++++ pandas/tests/indexing/test_loc.py | 10 +++++++ pandas/tests/io/test_pickle.py | 20 ++++++++++++++ pandas/tests/series/test_constructors.py | 9 +++++++ pandas/tests/test_multilevel.py | 34 ++++++++++++++++++++++++ 7 files changed, 117 insertions(+) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index a861e0eb52391..b1620df91ba26 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2433,6 +2433,24 @@ def test_datetime_date_tuple_columns_from_dict(self): expected = DataFrame([0, 1, 2], columns=pd.Index(pd.Series([tup]))) tm.assert_frame_equal(result, expected) + def test_construct_with_two_categoricalindex_series(self): + # GH 14600 + s1 = pd.Series( + [39, 6, 4], index=pd.CategoricalIndex(["female", "male", "unknown"]) + ) + s2 = pd.Series( + [2, 152, 2, 242, 150], + index=pd.CategoricalIndex(["f", "female", "m", "male", "unknown"]), + ) + result = pd.DataFrame([s1, s2]) + expected = pd.DataFrame( + np.array( + [[np.nan, 39.0, np.nan, 6.0, 4.0], [2.0, 152.0, 2.0, 242.0, 150.0]] + ), + columns=["f", "female", "m", "male", "unknown"], + ) + tm.assert_frame_equal(result, expected) + class TestDataFrameConstructorWithDatetimeTZ: def test_from_dict(self): diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 2f2f97f2cd993..75a6f022cc272 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -752,3 +752,22 @@ def most_common_values(df): ["17661101"], index=pd.DatetimeIndex(["2015-02-24"], name="day"), name="userId" ) tm.assert_series_equal(result, expected) + + +def test_groupby_apply_datetime_result_dtypes(): + # GH 14849 + data = pd.DataFrame.from_records( + [ + (pd.Timestamp(2016, 1, 1), "red", "dark", 1, "8"), + (pd.Timestamp(2015, 1, 1), "green", "stormy", 2, "9"), + (pd.Timestamp(2014, 1, 1), "blue", "bright", 3, "10"), + (pd.Timestamp(2013, 1, 1), "blue", "calm", 4, "potato"), + ], + columns=["observation", "color", "mood", "intensity", "score"], + ) + result = data.groupby("color").apply(lambda g: g.iloc[0]).dtypes + expected = Series( + [np.dtype("datetime64[ns]"), np.object, np.object, np.int64, np.object], + index=["observation", "color", "mood", "intensity", "score"], + ) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 7e374811d1960..eb9552fbbebc1 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1952,6 +1952,13 @@ def test_shift_bfill_ffill_tz(tz_naive_fixture, op, expected): tm.assert_frame_equal(result, expected) +def test_ffill_missing_arguments(): + # GH 14955 + df = pd.DataFrame({"a": [1, 2], "b": [1, 1]}) + with pytest.raises(ValueError, match="Must specify a fill"): + df.groupby("b").fillna() + + def test_groupby_only_none_group(): # see GH21624 # this was crashing with "ValueError: Length of passed values is 1, index implies 0" diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 78fcd15ab4cc1..4c1436b800fc3 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1002,3 +1002,13 @@ def test_loc_axis_1_slice(): ), ) tm.assert_frame_equal(result, expected) + + +def test_loc_set_dataframe_multiindex(): + # GH 14592 + expected = pd.DataFrame( + "a", index=range(2), columns=pd.MultiIndex.from_product([range(2), range(2)]) + ) + result = expected.copy() + result.loc[0, [(0, 1)]] = result.loc[0, [(0, 1)]] + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 3d427dde573af..67aba8716b2d0 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -11,6 +11,7 @@ 3. Move the created pickle to "data/legacy_pickle/" directory. """ import bz2 +import datetime import glob import gzip import os @@ -487,3 +488,22 @@ def open(self, *args): df.to_pickle(mockurl) result = pd.read_pickle(mockurl) tm.assert_frame_equal(df, result) + + +class MyTz(datetime.tzinfo): + def __init__(self): + pass + + +def test_read_pickle_with_subclass(): + # GH 12163 + expected = pd.Series(dtype=object), MyTz() + + with tm.ensure_clean() as path: + with open(path, "wb") as f: + pickle.dump(expected, f) + + result = pd.read_pickle(path) + + tm.assert_series_equal(result[0], expected[0]) + assert isinstance(result[1], MyTz) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index d760939657d47..860cf49d93e76 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1413,3 +1413,12 @@ def test_constructor_tz_mixed_data(self): result = Series(dt_list) expected = Series(dt_list, dtype=object) tm.assert_series_equal(result, expected) + + def test_contructor_dict_tuple_indexer(self): + # GH 12948 + data = {(1, 1, None): -1.0} + result = Series(data) + expected = Series( + -1.0, index=MultiIndex(levels=[[1], [1], [np.nan]], codes=[[0], [0], [-1]]) + ) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 5382ad84bcca2..03909442d663c 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2147,6 +2147,40 @@ def test_sort_index_level_mixed(self): sorted_after.drop([("foo", "three")], axis=1), ) + def test_sort_index_categorical_multiindex(self): + # GH 15058 + df = DataFrame( + { + "a": np.arange(6), + "l1": pd.Categorical( + ["a", "a", "b", "b", "c", "c"], + categories=["c", "a", "b"], + ordered=True, + ), + "l2": [0, 1, 0, 1, 0, 1], + } + ) + result = df.set_index(["l1", "l2"]).sort_index() + expected = DataFrame( + [4, 5, 0, 1, 2, 3], + columns=["a"], + index=MultiIndex( + levels=[ + pd.CategoricalIndex( + ["c", "a", "b"], + categories=["c", "a", "b"], + ordered=True, + name="l1", + dtype="category", + ), + pd.Int64Index([0, 1], dtype="int64", name="l2"), + ], + codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], + names=["l1", "l2"], + ), + ) + tm.assert_frame_equal(result, expected) + def test_is_lexsorted(self): levels = [[0, 1], [0, 1, 2]] From 2d794a0bb5adc772359114bac50e0192adb6b3a3 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 20 Jan 2020 13:42:05 -0800 Subject: [PATCH 2/4] Platform compat test --- pandas/tests/test_multilevel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 03909442d663c..975b03fdde67b 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2173,7 +2173,7 @@ def test_sort_index_categorical_multiindex(self): name="l1", dtype="category", ), - pd.Int64Index([0, 1], dtype="int64", name="l2"), + [0, 1], ], codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], names=["l1", "l2"], From 37afd20fac24009abcc7b1df788d95a1150fbb67 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 20 Jan 2020 15:21:06 -0800 Subject: [PATCH 3/4] Use range instead --- pandas/tests/test_multilevel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 975b03fdde67b..1adc5011a0c31 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2151,7 +2151,7 @@ def test_sort_index_categorical_multiindex(self): # GH 15058 df = DataFrame( { - "a": np.arange(6), + "a": range(6), "l1": pd.Categorical( ["a", "a", "b", "b", "c", "c"], categories=["c", "a", "b"], From a026590a7be2913ae504e79c8f89b7f2632d8803 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 20 Jan 2020 15:26:07 -0800 Subject: [PATCH 4/4] Address comments --- pandas/tests/io/test_pickle.py | 7 +------ pandas/tests/series/test_constructors.py | 18 +++++++++--------- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 67aba8716b2d0..22c4e38206df6 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -498,12 +498,7 @@ def __init__(self): def test_read_pickle_with_subclass(): # GH 12163 expected = pd.Series(dtype=object), MyTz() - - with tm.ensure_clean() as path: - with open(path, "wb") as f: - pickle.dump(expected, f) - - result = pd.read_pickle(path) + result = tm.round_trip_pickle(expected) tm.assert_series_equal(result[0], expected[0]) assert isinstance(result[1], MyTz) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 860cf49d93e76..2651c3d73c9ab 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1115,6 +1115,15 @@ def create_data(constructor): tm.assert_series_equal(result_datetime, expected) tm.assert_series_equal(result_Timestamp, expected) + def test_contructor_dict_tuple_indexer(self): + # GH 12948 + data = {(1, 1, None): -1.0} + result = Series(data) + expected = Series( + -1.0, index=MultiIndex(levels=[[1], [1], [np.nan]], codes=[[0], [0], [-1]]) + ) + tm.assert_series_equal(result, expected) + def test_constructor_mapping(self, non_mapping_dict_subclass): # GH 29788 ndm = non_mapping_dict_subclass({3: "three"}) @@ -1413,12 +1422,3 @@ def test_constructor_tz_mixed_data(self): result = Series(dt_list) expected = Series(dt_list, dtype=object) tm.assert_series_equal(result, expected) - - def test_contructor_dict_tuple_indexer(self): - # GH 12948 - data = {(1, 1, None): -1.0} - result = Series(data) - expected = Series( - -1.0, index=MultiIndex(levels=[[1], [1], [np.nan]], codes=[[0], [0], [-1]]) - ) - tm.assert_series_equal(result, expected)