diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 366ccf2fc9219..2d58a4391c80c 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -98,114 +98,7 @@ def test_setitem_list2(self): expected = Series(["1", "2"], df.columns, name=1) tm.assert_series_equal(result, expected) - def test_setitem_list_of_tuples(self, float_frame): - tuples = list(zip(float_frame["A"], float_frame["B"])) - float_frame["tuples"] = tuples - - result = float_frame["tuples"] - expected = Series(tuples, index=float_frame.index, name="tuples") - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize( - "columns,box,expected", - [ - ( - ["A", "B", "C", "D"], - 7, - DataFrame( - [[7, 7, 7, 7], [7, 7, 7, 7], [7, 7, 7, 7]], - columns=["A", "B", "C", "D"], - ), - ), - ( - ["C", "D"], - [7, 8], - DataFrame( - [[1, 2, 7, 8], [3, 4, 7, 8], [5, 6, 7, 8]], - columns=["A", "B", "C", "D"], - ), - ), - ( - ["A", "B", "C"], - np.array([7, 8, 9], dtype=np.int64), - DataFrame([[7, 8, 9], [7, 8, 9], [7, 8, 9]], columns=["A", "B", "C"]), - ), - ( - ["B", "C", "D"], - [[7, 8, 9], [10, 11, 12], [13, 14, 15]], - DataFrame( - [[1, 7, 8, 9], [3, 10, 11, 12], [5, 13, 14, 15]], - columns=["A", "B", "C", "D"], - ), - ), - ( - ["C", "A", "D"], - np.array([[7, 8, 9], [10, 11, 12], [13, 14, 15]], dtype=np.int64), - DataFrame( - [[8, 2, 7, 9], [11, 4, 10, 12], [14, 6, 13, 15]], - columns=["A", "B", "C", "D"], - ), - ), - ( - ["A", "C"], - DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]), - DataFrame( - [[7, 2, 8], [9, 4, 10], [11, 6, 12]], columns=["A", "B", "C"] - ), - ), - ], - ) - def test_setitem_list_missing_columns(self, columns, box, expected): - # GH 29334 - df = DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"]) - df[columns] = box - tm.assert_frame_equal(df, expected) - - def test_setitem_multi_index(self): - # GH7655, test that assigning to a sub-frame of a frame - # with multi-index columns aligns both rows and columns - it = ["jim", "joe", "jolie"], ["first", "last"], ["left", "center", "right"] - - cols = MultiIndex.from_product(it) - index = date_range("20141006", periods=20) - vals = np.random.randint(1, 1000, (len(index), len(cols))) - df = DataFrame(vals, columns=cols, index=index) - - i, j = df.index.values.copy(), it[-1][:] - - np.random.shuffle(i) - df["jim"] = df["jolie"].loc[i, ::-1] - tm.assert_frame_equal(df["jim"], df["jolie"]) - - np.random.shuffle(j) - df[("joe", "first")] = df[("jolie", "last")].loc[i, j] - tm.assert_frame_equal(df[("joe", "first")], df[("jolie", "last")]) - - np.random.shuffle(j) - df[("joe", "last")] = df[("jolie", "first")].loc[i, j] - tm.assert_frame_equal(df[("joe", "last")], df[("jolie", "first")]) - - @pytest.mark.parametrize( - "cols, values, expected", - [ - (["C", "D", "D", "a"], [1, 2, 3, 4], 4), # with duplicates - (["D", "C", "D", "a"], [1, 2, 3, 4], 4), # mixed order - (["C", "B", "B", "a"], [1, 2, 3, 4], 4), # other duplicate cols - (["C", "B", "a"], [1, 2, 3], 3), # no duplicates - (["B", "C", "a"], [3, 2, 1], 1), # alphabetical order - (["C", "a", "B"], [3, 2, 1], 2), # in the middle - ], - ) - def test_setitem_same_column(self, cols, values, expected): - # GH 23239 - df = DataFrame([values], columns=cols) - df["a"] = df["a"] - result = df["a"].values[0] - assert result == expected - - def test_getitem_boolean( - self, float_string_frame, mixed_float_frame, mixed_int_frame, datetime_frame - ): + def test_getitem_boolean(self, mixed_float_frame, mixed_int_frame, datetime_frame): # boolean indexing d = datetime_frame.index[10] indexer = datetime_frame.index > d @@ -242,12 +135,9 @@ def test_getitem_boolean( # test df[df > 0] for df in [ datetime_frame, - float_string_frame, mixed_float_frame, mixed_int_frame, ]: - if df is float_string_frame: - continue data = df._get_numeric_data() bif = df[df > 0] @@ -348,6 +238,7 @@ def test_getitem_ix_mixed_integer(self): expected = df.loc[Index([1, 10])] tm.assert_frame_equal(result, expected) + def test_getitem_ix_mixed_integer2(self): # 11320 df = DataFrame( { @@ -419,6 +310,7 @@ def test_setitem(self, float_frame): assert smaller["col10"].dtype == np.object_ assert (smaller["col10"] == ["1", "2"]).all() + def test_setitem2(self): # dtype changing GH4204 df = DataFrame([[0, 0]]) df.iloc[0] = np.nan @@ -508,34 +400,6 @@ def test_setitem_cast(self, float_frame): float_frame["something"] = 2.5 assert float_frame["something"].dtype == np.float64 - # GH 7704 - # dtype conversion on setting - df = DataFrame(np.random.rand(30, 3), columns=tuple("ABC")) - df["event"] = np.nan - df.loc[10, "event"] = "foo" - result = df.dtypes - expected = Series( - [np.dtype("float64")] * 3 + [np.dtype("object")], - index=["A", "B", "C", "event"], - ) - tm.assert_series_equal(result, expected) - - # Test that data type is preserved . #5782 - df = DataFrame({"one": np.arange(6, dtype=np.int8)}) - df.loc[1, "one"] = 6 - assert df.dtypes.one == np.dtype(np.int8) - df.one = np.int8(7) - assert df.dtypes.one == np.dtype(np.int8) - - def test_setitem_boolean_column(self, float_frame): - expected = float_frame.copy() - mask = float_frame["A"] > 0 - - float_frame.loc[mask, "B"] = 0 - expected.values[mask.values, 1] = 0 - - tm.assert_frame_equal(float_frame, expected) - def test_setitem_corner(self, float_frame): # corner case df = DataFrame({"B": [1.0, 2.0, 3.0], "C": ["a", "b", "c"]}, index=np.arange(3)) @@ -908,17 +772,6 @@ def test_getitem_setitem_float_labels(self): result = cp.loc[1.0:5.0] assert (result == 0).values.all() - def test_setitem_single_column_mixed(self): - df = DataFrame( - np.random.randn(5, 3), - index=["a", "b", "c", "d", "e"], - columns=["foo", "bar", "baz"], - ) - df["str"] = "qux" - df.loc[df.index[::2], "str"] = np.nan - expected = np.array([np.nan, "qux", np.nan, "qux", np.nan], dtype=object) - tm.assert_almost_equal(df["str"].values, expected) - def test_setitem_single_column_mixed_datetime(self): df = DataFrame( np.random.randn(5, 3), @@ -1182,24 +1035,6 @@ def test_iloc_col(self): expected = df.reindex(columns=df.columns[[1, 2, 4, 6]]) tm.assert_frame_equal(result, expected) - def test_iloc_duplicates(self): - - df = DataFrame(np.random.rand(3, 3), columns=list("ABC"), index=list("aab")) - - result = df.iloc[0] - assert isinstance(result, Series) - tm.assert_almost_equal(result.values, df.values[0]) - - result = df.T.iloc[:, 0] - assert isinstance(result, Series) - tm.assert_almost_equal(result.values, df.values[0]) - - # #2259 - df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=[1, 1, 2]) - result = df.iloc[:, [0]] - expected = df.take([0], axis=1) - tm.assert_frame_equal(result, expected) - def test_loc_duplicates(self): # gh-17105 @@ -1227,10 +1062,6 @@ def test_loc_duplicates(self): df.loc[trange[bool_idx], "A"] += 6 tm.assert_frame_equal(df, expected) - def test_set_dataframe_column_ns_dtype(self): - x = DataFrame([datetime.now(), datetime.now()]) - assert x[0].dtype == np.dtype("M8[ns]") - def test_setitem_with_unaligned_tz_aware_datetime_column(self): # GH 12981 # Assignment of unaligned offset-aware datetime series. @@ -1266,33 +1097,6 @@ def test_loc_setitem_datetimelike_with_inference(self): ) tm.assert_series_equal(result, expected) - def test_loc_getitem_index_namedtuple(self): - from collections import namedtuple - - IndexType = namedtuple("IndexType", ["a", "b"]) - idx1 = IndexType("foo", "bar") - idx2 = IndexType("baz", "bof") - index = Index([idx1, idx2], name="composite_index", tupleize_cols=False) - df = DataFrame([(1, 2), (3, 4)], index=index, columns=["A", "B"]) - - result = df.loc[IndexType("foo", "bar")]["A"] - assert result == 1 - - @pytest.mark.parametrize("tpl", [(1,), (1, 2)]) - def test_loc_getitem_index_single_double_tuples(self, tpl): - # GH 20991 - idx = Index( - [(1,), (1, 2)], - name="A", - tupleize_cols=False, - ) - df = DataFrame(index=idx) - - result = df.loc[[tpl]] - idx = Index([tpl], name="A", tupleize_cols=False) - expected = DataFrame(index=idx) - tm.assert_frame_equal(result, expected) - def test_getitem_boolean_indexing_mixed(self): df = DataFrame( { @@ -1346,7 +1150,7 @@ def test_type_error_multiindex(self): data=[[0, 0, 1, 2], [1, 0, 3, 4], [0, 1, 1, 2], [1, 1, 3, 4]], ) dg = df.pivot_table(index="i", columns="c", values=["x", "y"]) - + # TODO: Is this test for pivot_table? with pytest.raises(TypeError, match="unhashable type"): dg[:, 0] @@ -1366,27 +1170,6 @@ def test_type_error_multiindex(self): result = dg["x", 0] tm.assert_series_equal(result, expected) - def test_loc_getitem_interval_index(self): - # GH 19977 - index = pd.interval_range(start=0, periods=3) - df = DataFrame( - [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=index, columns=["A", "B", "C"] - ) - - expected = 1 - result = df.loc[0.5, "A"] - tm.assert_almost_equal(result, expected) - - index = pd.interval_range(start=0, periods=3, closed="both") - df = DataFrame( - [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=index, columns=["A", "B", "C"] - ) - - index_exp = pd.interval_range(start=0, periods=2, freq=1, closed="both") - expected = Series([1, 4], index=index_exp, name="A") - result = df.loc[1, "A"] - tm.assert_series_equal(result, expected) - def test_getitem_interval_index_partial_indexing(self): # GH#36490 df = DataFrame( diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 7e3de9a5ae67c..f2edfed019bdb 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -23,6 +23,7 @@ Index, Interval, IntervalIndex, + MultiIndex, NaT, Period, PeriodIndex, @@ -467,6 +468,111 @@ def test_setitem_with_empty_listlike(self): expected = DataFrame(columns=["A"], index=index) tm.assert_index_equal(result.index, expected.index) + @pytest.mark.parametrize( + "cols, values, expected", + [ + (["C", "D", "D", "a"], [1, 2, 3, 4], 4), # with duplicates + (["D", "C", "D", "a"], [1, 2, 3, 4], 4), # mixed order + (["C", "B", "B", "a"], [1, 2, 3, 4], 4), # other duplicate cols + (["C", "B", "a"], [1, 2, 3], 3), # no duplicates + (["B", "C", "a"], [3, 2, 1], 1), # alphabetical order + (["C", "a", "B"], [3, 2, 1], 2), # in the middle + ], + ) + def test_setitem_same_column(self, cols, values, expected): + # GH#23239 + df = DataFrame([values], columns=cols) + df["a"] = df["a"] + result = df["a"].values[0] + assert result == expected + + def test_setitem_multi_index(self): + # GH#7655, test that assigning to a sub-frame of a frame + # with multi-index columns aligns both rows and columns + it = ["jim", "joe", "jolie"], ["first", "last"], ["left", "center", "right"] + + cols = MultiIndex.from_product(it) + index = date_range("20141006", periods=20) + vals = np.random.randint(1, 1000, (len(index), len(cols))) + df = DataFrame(vals, columns=cols, index=index) + + i, j = df.index.values.copy(), it[-1][:] + + np.random.shuffle(i) + df["jim"] = df["jolie"].loc[i, ::-1] + tm.assert_frame_equal(df["jim"], df["jolie"]) + + np.random.shuffle(j) + df[("joe", "first")] = df[("jolie", "last")].loc[i, j] + tm.assert_frame_equal(df[("joe", "first")], df[("jolie", "last")]) + + np.random.shuffle(j) + df[("joe", "last")] = df[("jolie", "first")].loc[i, j] + tm.assert_frame_equal(df[("joe", "last")], df[("jolie", "first")]) + + @pytest.mark.parametrize( + "columns,box,expected", + [ + ( + ["A", "B", "C", "D"], + 7, + DataFrame( + [[7, 7, 7, 7], [7, 7, 7, 7], [7, 7, 7, 7]], + columns=["A", "B", "C", "D"], + ), + ), + ( + ["C", "D"], + [7, 8], + DataFrame( + [[1, 2, 7, 8], [3, 4, 7, 8], [5, 6, 7, 8]], + columns=["A", "B", "C", "D"], + ), + ), + ( + ["A", "B", "C"], + np.array([7, 8, 9], dtype=np.int64), + DataFrame([[7, 8, 9], [7, 8, 9], [7, 8, 9]], columns=["A", "B", "C"]), + ), + ( + ["B", "C", "D"], + [[7, 8, 9], [10, 11, 12], [13, 14, 15]], + DataFrame( + [[1, 7, 8, 9], [3, 10, 11, 12], [5, 13, 14, 15]], + columns=["A", "B", "C", "D"], + ), + ), + ( + ["C", "A", "D"], + np.array([[7, 8, 9], [10, 11, 12], [13, 14, 15]], dtype=np.int64), + DataFrame( + [[8, 2, 7, 9], [11, 4, 10, 12], [14, 6, 13, 15]], + columns=["A", "B", "C", "D"], + ), + ), + ( + ["A", "C"], + DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]), + DataFrame( + [[7, 2, 8], [9, 4, 10], [11, 6, 12]], columns=["A", "B", "C"] + ), + ), + ], + ) + def test_setitem_list_missing_columns(self, columns, box, expected): + # GH#29334 + df = DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"]) + df[columns] = box + tm.assert_frame_equal(df, expected) + + def test_setitem_list_of_tuples(self, float_frame): + tuples = list(zip(float_frame["A"], float_frame["B"])) + float_frame["tuples"] = tuples + + result = float_frame["tuples"] + expected = Series(tuples, index=float_frame.index, name="tuples") + tm.assert_series_equal(result, expected) + class TestSetitemTZAwareValues: @pytest.fixture diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py index 0f51c4aef79db..84f5fa2021e2a 100644 --- a/pandas/tests/frame/methods/test_drop.py +++ b/pandas/tests/frame/methods/test_drop.py @@ -467,3 +467,19 @@ def test_drop_with_duplicate_columns(self): tm.assert_frame_equal(result, expected) result = df.drop("a", axis=1) tm.assert_frame_equal(result, expected) + + def test_drop_with_duplicate_columns2(self): + # drop buggy GH#6240 + df = DataFrame( + { + "A": np.random.randn(5), + "B": np.random.randn(5), + "C": np.random.randn(5), + "D": ["a", "b", "c", "d", "e"], + } + ) + + expected = df.take([0, 1, 1], axis=1) + df2 = df.take([2, 0, 1, 2, 1], axis=1) + result = df2.drop("C", axis=1) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_dropna.py b/pandas/tests/frame/methods/test_dropna.py index e28c716544209..b671bb1afb27a 100644 --- a/pandas/tests/frame/methods/test_dropna.py +++ b/pandas/tests/frame/methods/test_dropna.py @@ -210,3 +210,24 @@ def test_dropna_categorical_interval_index(self): expected = df result = df.dropna() tm.assert_frame_equal(result, expected) + + def test_dropna_with_duplicate_columns(self): + df = DataFrame( + { + "A": np.random.randn(5), + "B": np.random.randn(5), + "C": np.random.randn(5), + "D": ["a", "b", "c", "d", "e"], + } + ) + df.iloc[2, [0, 1, 2]] = np.nan + df.iloc[0, 0] = np.nan + df.iloc[1, 1] = np.nan + df.iloc[:, 3] = np.nan + expected = df.dropna(subset=["A", "B", "C"], how="all") + expected.columns = ["A", "A", "B", "C"] + + df.columns = ["A", "A", "B", "C"] + + result = df.dropna(subset=["A", "C"], how="all") + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 79918ee1fb1b2..cb83dfba683b7 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -64,6 +64,10 @@ class TestDataFrameConstructors: + def test_construct_from_list_of_datetimes(self): + df = DataFrame([datetime.now(), datetime.now()]) + assert df[0].dtype == np.dtype("M8[ns]") + def test_constructor_from_tzaware_datetimeindex(self): # don't cast a DatetimeIndex WITH a tz, leave as object # GH#6032 diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index c3812e109b938..6a6e2a5aa2636 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -195,45 +195,6 @@ def test_changing_dtypes_with_duplicate_columns(self): df["that"] = 1 check(df, expected) - def test_column_dups_drop(self): - - # drop buggy GH 6240 - df = DataFrame( - { - "A": np.random.randn(5), - "B": np.random.randn(5), - "C": np.random.randn(5), - "D": ["a", "b", "c", "d", "e"], - } - ) - - expected = df.take([0, 1, 1], axis=1) - df2 = df.take([2, 0, 1, 2, 1], axis=1) - result = df2.drop("C", axis=1) - tm.assert_frame_equal(result, expected) - - def test_column_dups_dropna(self): - # dropna - df = DataFrame( - { - "A": np.random.randn(5), - "B": np.random.randn(5), - "C": np.random.randn(5), - "D": ["a", "b", "c", "d", "e"], - } - ) - df.iloc[2, [0, 1, 2]] = np.nan - df.iloc[0, 0] = np.nan - df.iloc[1, 1] = np.nan - df.iloc[:, 3] = np.nan - expected = df.dropna(subset=["A", "B", "C"], how="all") - expected.columns = ["A", "A", "B", "C"] - - df.columns = ["A", "A", "B", "C"] - - result = df.dropna(subset=["A", "C"], how="all") - tm.assert_frame_equal(result, expected) - def test_dup_columns_comparisons(self): # equality df1 = DataFrame([[1, 2], [2, np.nan], [3, 4], [4, 4]], columns=["A", "B"]) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 696693ec158ca..43ffc9e8eaedd 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -1040,6 +1040,25 @@ def view(self): tm.assert_frame_equal(result, df) + def test_iloc_getitem_with_duplicates(self): + + df = DataFrame(np.random.rand(3, 3), columns=list("ABC"), index=list("aab")) + + result = df.iloc[0] + assert isinstance(result, Series) + tm.assert_almost_equal(result.values, df.values[0]) + + result = df.T.iloc[:, 0] + assert isinstance(result, Series) + tm.assert_almost_equal(result.values, df.values[0]) + + def test_iloc_getitem_with_duplicates2(self): + # GH#2259 + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=[1, 1, 2]) + result = df.iloc[:, [0]] + expected = df.take([0], axis=1) + tm.assert_frame_equal(result, expected) + class TestILocErrors: # NB: this test should work for _any_ Series we can pass as diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 3726bbecde827..466e60e84b318 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1,4 +1,5 @@ """ test label based indexing with loc """ +from collections import namedtuple from datetime import ( date, datetime, @@ -1263,6 +1264,86 @@ def test_loc_setitem_2d_to_1d_raises(self): with pytest.raises(ValueError, match=msg): ser.loc[:] = data + def test_loc_getitem_interval_index(self): + # GH#19977 + index = pd.interval_range(start=0, periods=3) + df = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=index, columns=["A", "B", "C"] + ) + + expected = 1 + result = df.loc[0.5, "A"] + tm.assert_almost_equal(result, expected) + + def test_loc_getitem_interval_index2(self): + # GH#19977 + index = pd.interval_range(start=0, periods=3, closed="both") + df = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=index, columns=["A", "B", "C"] + ) + + index_exp = pd.interval_range(start=0, periods=2, freq=1, closed="both") + expected = Series([1, 4], index=index_exp, name="A") + result = df.loc[1, "A"] + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("tpl", [(1,), (1, 2)]) + def test_loc_getitem_index_single_double_tuples(self, tpl): + # GH#20991 + idx = Index( + [(1,), (1, 2)], + name="A", + tupleize_cols=False, + ) + df = DataFrame(index=idx) + + result = df.loc[[tpl]] + idx = Index([tpl], name="A", tupleize_cols=False) + expected = DataFrame(index=idx) + tm.assert_frame_equal(result, expected) + + def test_loc_getitem_index_namedtuple(self): + IndexType = namedtuple("IndexType", ["a", "b"]) + idx1 = IndexType("foo", "bar") + idx2 = IndexType("baz", "bof") + index = Index([idx1, idx2], name="composite_index", tupleize_cols=False) + df = DataFrame([(1, 2), (3, 4)], index=index, columns=["A", "B"]) + + result = df.loc[IndexType("foo", "bar")]["A"] + assert result == 1 + + def test_loc_setitem_single_column_mixed(self): + df = DataFrame( + np.random.randn(5, 3), + index=["a", "b", "c", "d", "e"], + columns=["foo", "bar", "baz"], + ) + df["str"] = "qux" + df.loc[df.index[::2], "str"] = np.nan + expected = np.array([np.nan, "qux", np.nan, "qux", np.nan], dtype=object) + tm.assert_almost_equal(df["str"].values, expected) + + def test_loc_setitem_cast2(self): + # GH#7704 + # dtype conversion on setting + df = DataFrame(np.random.rand(30, 3), columns=tuple("ABC")) + df["event"] = np.nan + df.loc[10, "event"] = "foo" + result = df.dtypes + expected = Series( + [np.dtype("float64")] * 3 + [np.dtype("object")], + index=["A", "B", "C", "event"], + ) + tm.assert_series_equal(result, expected) + + def test_loc_setitem_cast3(self): + # Test that data type is preserved . GH#5782 + df = DataFrame({"one": np.arange(6, dtype=np.int8)}) + df.loc[1, "one"] = 6 + assert df.dtypes.one == np.dtype(np.int8) + df.one = np.int8(7) + assert df.dtypes.one == np.dtype(np.int8) + class TestLocWithMultiIndex: @pytest.mark.parametrize( @@ -1937,6 +2018,15 @@ def test_loc_setitem_mask_td64_series_value(self): assert expected == result tm.assert_frame_equal(df, df_copy) + def test_loc_setitem_boolean_and_column(self, float_frame): + expected = float_frame.copy() + mask = float_frame["A"] > 0 + + float_frame.loc[mask, "B"] = 0 + expected.values[mask.values, 1] = 0 + + tm.assert_frame_equal(float_frame, expected) + class TestLocListlike: @pytest.mark.parametrize("box", [lambda x: x, np.asarray, list]) @@ -2384,3 +2474,29 @@ def test_loc_series_getitem_too_many_dimensions(self, indexer): with pytest.raises(ValueError, match=msg): ser.loc[indexer, :] = 1 + + def test_loc_setitem(self, string_series): + inds = string_series.index[[3, 4, 7]] + + result = string_series.copy() + result.loc[inds] = 5 + + expected = string_series.copy() + expected[[3, 4, 7]] = 5 + tm.assert_series_equal(result, expected) + + result.iloc[5:10] = 10 + expected[5:10] = 10 + tm.assert_series_equal(result, expected) + + # set slice with indices + d1, d2 = string_series.index[[5, 15]] + result.loc[d1:d2] = 6 + expected[5:16] = 6 # because it's inclusive + tm.assert_series_equal(result, expected) + + # set index value + string_series.loc[d1] = 4 + string_series.loc[d2] = 6 + assert string_series[d1] == 4 + assert string_series[d2] == 6 diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index 8098b195c3838..1de6540217655 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -402,6 +402,9 @@ def compare(slobj): expected.index = expected.index._with_freq(None) tm.assert_series_equal(result, expected) + +def test_indexing_unordered2(): + # diff freq rng = date_range(datetime(2005, 1, 1), periods=20, freq="M") ts = Series(np.arange(len(rng)), index=rng) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 34ba20c03b732..cd5a7af1d5ec0 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -7,7 +7,6 @@ import pandas as pd from pandas import ( - Categorical, DataFrame, IndexSlice, MultiIndex, @@ -175,14 +174,6 @@ def test_setitem(datetime_series, string_series): assert not np.isnan(datetime_series[2]) -def test_setitem_slicestep(): - # caught this bug when writing tests - series = Series(tm.makeIntIndex(20).astype(float), index=tm.makeIntIndex(20)) - - series[::2] = 0 - assert (series[::2] == 0).all() - - def test_setslice(datetime_series): sl = datetime_series[5:20] assert len(sl) == len(sl.index) @@ -214,43 +205,6 @@ def test_basic_getitem_setitem_corner(datetime_series): datetime_series[[5, slice(None, None)]] = 2 -def test_setitem_categorical_assigning_ops(): - orig = Series(Categorical(["b", "b"], categories=["a", "b"])) - s = orig.copy() - s[:] = "a" - exp = Series(Categorical(["a", "a"], categories=["a", "b"])) - tm.assert_series_equal(s, exp) - - s = orig.copy() - s[1] = "a" - exp = Series(Categorical(["b", "a"], categories=["a", "b"])) - tm.assert_series_equal(s, exp) - - s = orig.copy() - s[s.index > 0] = "a" - exp = Series(Categorical(["b", "a"], categories=["a", "b"])) - tm.assert_series_equal(s, exp) - - s = orig.copy() - s[[False, True]] = "a" - exp = Series(Categorical(["b", "a"], categories=["a", "b"])) - tm.assert_series_equal(s, exp) - - s = orig.copy() - s.index = ["x", "y"] - s["y"] = "a" - exp = Series(Categorical(["b", "a"], categories=["a", "b"]), index=["x", "y"]) - tm.assert_series_equal(s, exp) - - -def test_setitem_nan_into_categorical(): - # ensure that one can set something to np.nan - ser = Series(Categorical([1, 2, 3])) - exp = Series(Categorical([1, np.nan, 3], categories=[1, 2, 3])) - ser[1] = np.nan - tm.assert_series_equal(ser, exp) - - def test_slice(string_series, object_series): numSlice = string_series[10:20] numSliceEnd = string_series[-10:] @@ -272,33 +226,6 @@ def test_slice(string_series, object_series): assert (string_series[10:20] == 0).all() -def test_loc_setitem(string_series): - inds = string_series.index[[3, 4, 7]] - - result = string_series.copy() - result.loc[inds] = 5 - - expected = string_series.copy() - expected[[3, 4, 7]] = 5 - tm.assert_series_equal(result, expected) - - result.iloc[5:10] = 10 - expected[5:10] = 10 - tm.assert_series_equal(result, expected) - - # set slice with indices - d1, d2 = string_series.index[[5, 15]] - result.loc[d1:d2] = 6 - expected[5:16] = 6 # because it's inclusive - tm.assert_series_equal(result, expected) - - # set index value - string_series.loc[d1] = 4 - string_series.loc[d2] = 6 - assert string_series[d1] == 4 - assert string_series[d2] == 6 - - def test_timedelta_assignment(): # GH 8209 s = Series([], dtype=object) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index bbe328114fd20..36ade2c8b8b43 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -7,6 +7,7 @@ import pytest from pandas import ( + Categorical, DatetimeIndex, Index, MultiIndex, @@ -193,6 +194,13 @@ def test_setitem_slice_integers(self): assert (ser[:4] == 0).all() assert not (ser[4:] == 0).any() + def test_setitem_slicestep(self): + # caught this bug when writing tests + series = Series(tm.makeIntIndex(20).astype(float), index=tm.makeIntIndex(20)) + + series[::2] = 0 + assert (series[::2] == 0).all() + class TestSetitemBooleanMask: def test_setitem_boolean(self, string_series): @@ -427,6 +435,43 @@ def test_setitem_slice_into_readonly_backing_data(): assert not array.any() +def test_setitem_categorical_assigning_ops(): + orig = Series(Categorical(["b", "b"], categories=["a", "b"])) + ser = orig.copy() + ser[:] = "a" + exp = Series(Categorical(["a", "a"], categories=["a", "b"])) + tm.assert_series_equal(ser, exp) + + ser = orig.copy() + ser[1] = "a" + exp = Series(Categorical(["b", "a"], categories=["a", "b"])) + tm.assert_series_equal(ser, exp) + + ser = orig.copy() + ser[ser.index > 0] = "a" + exp = Series(Categorical(["b", "a"], categories=["a", "b"])) + tm.assert_series_equal(ser, exp) + + ser = orig.copy() + ser[[False, True]] = "a" + exp = Series(Categorical(["b", "a"], categories=["a", "b"])) + tm.assert_series_equal(ser, exp) + + ser = orig.copy() + ser.index = ["x", "y"] + ser["y"] = "a" + exp = Series(Categorical(["b", "a"], categories=["a", "b"]), index=["x", "y"]) + tm.assert_series_equal(ser, exp) + + +def test_setitem_nan_into_categorical(): + # ensure that one can set something to np.nan + ser = Series(Categorical([1, 2, 3])) + exp = Series(Categorical([1, np.nan, 3], categories=[1, 2, 3])) + ser[1] = np.nan + tm.assert_series_equal(ser, exp) + + class TestSetitemCasting: @pytest.mark.parametrize("unique", [True, False]) @pytest.mark.parametrize("val", [3, 3.0, "3"], ids=type)