diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index c487777fc339e..82d55a7bf7189 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -22,6 +22,213 @@ import pandas._testing as tm +class TestEmptyFrameSetitemExpansion: + def test_empty_frame_setitem_index_name_retained(self): + # GH#31368 empty frame has non-None index.name -> retained + df = DataFrame({}, index=pd.RangeIndex(0, name="df_index")) + series = Series(1.23, index=pd.RangeIndex(4, name="series_index")) + + df["series"] = series + expected = DataFrame( + {"series": [1.23] * 4}, index=pd.RangeIndex(4, name="df_index") + ) + + tm.assert_frame_equal(df, expected) + + def test_empty_frame_setitem_index_name_inherited(self): + # GH#36527 empty frame has None index.name -> not retained + df = DataFrame() + series = Series(1.23, index=pd.RangeIndex(4, name="series_index")) + df["series"] = series + expected = DataFrame( + {"series": [1.23] * 4}, index=pd.RangeIndex(4, name="series_index") + ) + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_zerolen_series_columns_align(self): + # columns will align + df = DataFrame(columns=["A", "B"]) + df.loc[0] = Series(1, index=range(4)) + expected = DataFrame(columns=["A", "B"], index=[0], dtype=np.float64) + tm.assert_frame_equal(df, expected) + + # columns will align + df = DataFrame(columns=["A", "B"]) + df.loc[0] = Series(1, index=["B"]) + + exp = DataFrame([[np.nan, 1]], columns=["A", "B"], index=[0], dtype="float64") + tm.assert_frame_equal(df, exp) + + def test_loc_setitem_zerolen_list_length_must_match_columns(self): + # list-like must conform + df = DataFrame(columns=["A", "B"]) + + msg = "cannot set a row with mismatched columns" + with pytest.raises(ValueError, match=msg): + df.loc[0] = [1, 2, 3] + + df = DataFrame(columns=["A", "B"]) + df.loc[3] = [6, 7] # length matches len(df.columns) --> OK! + + exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype=np.int64) + tm.assert_frame_equal(df, exp) + + def test_partial_set_empty_frame(self): + + # partially set with an empty object + # frame + df = DataFrame() + + msg = "cannot set a frame with no defined columns" + + with pytest.raises(ValueError, match=msg): + df.loc[1] = 1 + + with pytest.raises(ValueError, match=msg): + df.loc[1] = Series([1], index=["foo"]) + + msg = "cannot set a frame with no defined index and a scalar" + with pytest.raises(ValueError, match=msg): + df.loc[:, 1] = 1 + + def test_partial_set_empty_frame2(self): + # these work as they don't really change + # anything but the index + # GH#5632 + expected = DataFrame(columns=["foo"], index=Index([], dtype="object")) + + df = DataFrame(index=Index([], dtype="object")) + df["foo"] = Series([], dtype="object") + + tm.assert_frame_equal(df, expected) + + df = DataFrame() + df["foo"] = Series(df.index) + + tm.assert_frame_equal(df, expected) + + df = DataFrame() + df["foo"] = df.index + + tm.assert_frame_equal(df, expected) + + def test_partial_set_empty_frame3(self): + expected = DataFrame(columns=["foo"], index=Index([], dtype="int64")) + expected["foo"] = expected["foo"].astype("float64") + + df = DataFrame(index=Index([], dtype="int64")) + df["foo"] = [] + + tm.assert_frame_equal(df, expected) + + df = DataFrame(index=Index([], dtype="int64")) + df["foo"] = Series(np.arange(len(df)), dtype="float64") + + tm.assert_frame_equal(df, expected) + + def test_partial_set_empty_frame4(self): + df = DataFrame(index=Index([], dtype="int64")) + df["foo"] = range(len(df)) + + expected = DataFrame(columns=["foo"], index=Index([], dtype="int64")) + # range is int-dtype-like, so we get int64 dtype + expected["foo"] = expected["foo"].astype("int64") + tm.assert_frame_equal(df, expected) + + def test_partial_set_empty_frame5(self): + df = DataFrame() + tm.assert_index_equal(df.columns, Index([], dtype=object)) + df2 = DataFrame() + df2[1] = Series([1], index=["foo"]) + df.loc[:, 1] = Series([1], index=["foo"]) + tm.assert_frame_equal(df, DataFrame([[1]], index=["foo"], columns=[1])) + tm.assert_frame_equal(df, df2) + + def test_partial_set_empty_frame_no_index(self): + # no index to start + expected = DataFrame({0: Series(1, index=range(4))}, columns=["A", "B", 0]) + + df = DataFrame(columns=["A", "B"]) + df[0] = Series(1, index=range(4)) + df.dtypes + str(df) + tm.assert_frame_equal(df, expected) + + df = DataFrame(columns=["A", "B"]) + df.loc[:, 0] = Series(1, index=range(4)) + df.dtypes + str(df) + tm.assert_frame_equal(df, expected) + + def test_partial_set_empty_frame_row(self): + # GH#5720, GH#5744 + # don't create rows when empty + expected = DataFrame(columns=["A", "B", "New"], index=Index([], dtype="int64")) + expected["A"] = expected["A"].astype("int64") + expected["B"] = expected["B"].astype("float64") + expected["New"] = expected["New"].astype("float64") + + df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) + y = df[df.A > 5] + y["New"] = np.nan + tm.assert_frame_equal(y, expected) + + expected = DataFrame(columns=["a", "b", "c c", "d"]) + expected["d"] = expected["d"].astype("int64") + df = DataFrame(columns=["a", "b", "c c"]) + df["d"] = 3 + tm.assert_frame_equal(df, expected) + tm.assert_series_equal(df["c c"], Series(name="c c", dtype=object)) + + # reindex columns is ok + df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) + y = df[df.A > 5] + result = y.reindex(columns=["A", "B", "C"]) + expected = DataFrame(columns=["A", "B", "C"], index=Index([], dtype="int64")) + expected["A"] = expected["A"].astype("int64") + expected["B"] = expected["B"].astype("float64") + expected["C"] = expected["C"].astype("float64") + tm.assert_frame_equal(result, expected) + + def test_partial_set_empty_frame_set_series(self): + # GH#5756 + # setting with empty Series + df = DataFrame(Series(dtype=object)) + expected = DataFrame({0: Series(dtype=object)}) + tm.assert_frame_equal(df, expected) + + df = DataFrame(Series(name="foo", dtype=object)) + expected = DataFrame({"foo": Series(dtype=object)}) + tm.assert_frame_equal(df, expected) + + def test_partial_set_empty_frame_empty_copy_assignment(self): + # GH#5932 + # copy on empty with assignment fails + df = DataFrame(index=[0]) + df = df.copy() + df["a"] = 0 + expected = DataFrame(0, index=[0], columns=["a"]) + tm.assert_frame_equal(df, expected) + + def test_partial_set_empty_frame_empty_consistencies(self): + # GH#6171 + # consistency on empty frames + df = DataFrame(columns=["x", "y"]) + df["x"] = [1, 2] + expected = DataFrame({"x": [1, 2], "y": [np.nan, np.nan]}) + tm.assert_frame_equal(df, expected, check_dtype=False) + + df = DataFrame(columns=["x", "y"]) + df["x"] = ["1", "2"] + expected = DataFrame({"x": ["1", "2"], "y": [np.nan, np.nan]}, dtype=object) + tm.assert_frame_equal(df, expected) + + df = DataFrame(columns=["x", "y"]) + df.loc[0, "x"] = 1 + expected = DataFrame({"x": [1], "y": [np.nan]}) + tm.assert_frame_equal(df, expected, check_dtype=False) + + class TestPartialSetting: def test_partial_setting(self): @@ -61,8 +268,7 @@ def test_partial_setting(self): with pytest.raises(IndexError, match=msg): s.iat[3] = 5.0 - # ## frame ## - + def test_partial_setting_frame(self): df_orig = DataFrame( np.arange(6).reshape(3, 2), columns=["A", "B"], dtype="int64" ) @@ -166,33 +372,6 @@ def test_partial_setting_mixed_dtype(self): df.loc[2] = df.loc[1] tm.assert_frame_equal(df, expected) - # columns will align - df = DataFrame(columns=["A", "B"]) - df.loc[0] = Series(1, index=range(4)) - expected = DataFrame(columns=["A", "B"], index=[0], dtype=np.float64) - tm.assert_frame_equal(df, expected) - - # columns will align - # TODO: it isn't great that this behavior depends on consolidation - df = DataFrame(columns=["A", "B"])._consolidate() - df.loc[0] = Series(1, index=["B"]) - - exp = DataFrame([[np.nan, 1]], columns=["A", "B"], index=[0], dtype="float64") - tm.assert_frame_equal(df, exp) - - # list-like must conform - df = DataFrame(columns=["A", "B"]) - - msg = "cannot set a row with mismatched columns" - with pytest.raises(ValueError, match=msg): - df.loc[0] = [1, 2, 3] - - df = DataFrame(columns=["A", "B"]) - df.loc[3] = [6, 7] - - exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype=np.int64) - tm.assert_frame_equal(df, exp) - def test_series_partial_set(self): # partial set with new index # Regression from GH4825 @@ -352,6 +531,7 @@ def test_setitem_with_expansion_numeric_into_datetimeindex(self, key): ex_index = Index(list(orig.index) + [key], dtype=object, name=orig.index.name) ex_data = np.concatenate([orig.values, df.iloc[[0]].values], axis=0) expected = DataFrame(ex_data, index=ex_index, columns=orig.columns) + tm.assert_frame_equal(df, expected) def test_partial_set_invalid(self): @@ -369,162 +549,6 @@ def test_partial_set_invalid(self): tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"])) assert df.index.dtype == "object" - def test_partial_set_empty_frame(self): - - # partially set with an empty object - # frame - df = DataFrame() - - msg = "cannot set a frame with no defined columns" - - with pytest.raises(ValueError, match=msg): - df.loc[1] = 1 - - with pytest.raises(ValueError, match=msg): - df.loc[1] = Series([1], index=["foo"]) - - msg = "cannot set a frame with no defined index and a scalar" - with pytest.raises(ValueError, match=msg): - df.loc[:, 1] = 1 - - def test_partial_set_empty_frame2(self): - # these work as they don't really change - # anything but the index - # GH5632 - expected = DataFrame(columns=["foo"], index=Index([], dtype="object")) - - df = DataFrame(index=Index([], dtype="object")) - df["foo"] = Series([], dtype="object") - - tm.assert_frame_equal(df, expected) - - df = DataFrame() - df["foo"] = Series(df.index) - - tm.assert_frame_equal(df, expected) - - df = DataFrame() - df["foo"] = df.index - - tm.assert_frame_equal(df, expected) - - def test_partial_set_empty_frame3(self): - expected = DataFrame(columns=["foo"], index=Index([], dtype="int64")) - expected["foo"] = expected["foo"].astype("float64") - - df = DataFrame(index=Index([], dtype="int64")) - df["foo"] = [] - - tm.assert_frame_equal(df, expected) - - df = DataFrame(index=Index([], dtype="int64")) - df["foo"] = Series(np.arange(len(df)), dtype="float64") - - tm.assert_frame_equal(df, expected) - - def test_partial_set_empty_frame4(self): - df = DataFrame(index=Index([], dtype="int64")) - df["foo"] = range(len(df)) - - expected = DataFrame(columns=["foo"], index=Index([], dtype="int64")) - # range is int-dtype-like, so we get int64 dtype - expected["foo"] = expected["foo"].astype("int64") - tm.assert_frame_equal(df, expected) - - def test_partial_set_empty_frame5(self): - df = DataFrame() - tm.assert_index_equal(df.columns, Index([], dtype=object)) - df2 = DataFrame() - df2[1] = Series([1], index=["foo"]) - df.loc[:, 1] = Series([1], index=["foo"]) - tm.assert_frame_equal(df, DataFrame([[1]], index=["foo"], columns=[1])) - tm.assert_frame_equal(df, df2) - - def test_partial_set_empty_frame_no_index(self): - # no index to start - expected = DataFrame({0: Series(1, index=range(4))}, columns=["A", "B", 0]) - - df = DataFrame(columns=["A", "B"]) - df[0] = Series(1, index=range(4)) - df.dtypes - str(df) - tm.assert_frame_equal(df, expected) - - df = DataFrame(columns=["A", "B"]) - df.loc[:, 0] = Series(1, index=range(4)) - df.dtypes - str(df) - tm.assert_frame_equal(df, expected) - - def test_partial_set_empty_frame_row(self): - # GH5720, GH5744 - # don't create rows when empty - expected = DataFrame(columns=["A", "B", "New"], index=Index([], dtype="int64")) - expected["A"] = expected["A"].astype("int64") - expected["B"] = expected["B"].astype("float64") - expected["New"] = expected["New"].astype("float64") - - df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) - y = df[df.A > 5] - y["New"] = np.nan - tm.assert_frame_equal(y, expected) - # tm.assert_frame_equal(y,expected) - - expected = DataFrame(columns=["a", "b", "c c", "d"]) - expected["d"] = expected["d"].astype("int64") - df = DataFrame(columns=["a", "b", "c c"]) - df["d"] = 3 - tm.assert_frame_equal(df, expected) - tm.assert_series_equal(df["c c"], Series(name="c c", dtype=object)) - - # reindex columns is ok - df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) - y = df[df.A > 5] - result = y.reindex(columns=["A", "B", "C"]) - expected = DataFrame(columns=["A", "B", "C"], index=Index([], dtype="int64")) - expected["A"] = expected["A"].astype("int64") - expected["B"] = expected["B"].astype("float64") - expected["C"] = expected["C"].astype("float64") - tm.assert_frame_equal(result, expected) - - def test_partial_set_empty_frame_set_series(self): - # GH 5756 - # setting with empty Series - df = DataFrame(Series(dtype=object)) - expected = DataFrame({0: Series(dtype=object)}) - tm.assert_frame_equal(df, expected) - - df = DataFrame(Series(name="foo", dtype=object)) - expected = DataFrame({"foo": Series(dtype=object)}) - tm.assert_frame_equal(df, expected) - - def test_partial_set_empty_frame_empty_copy_assignment(self): - # GH 5932 - # copy on empty with assignment fails - df = DataFrame(index=[0]) - df = df.copy() - df["a"] = 0 - expected = DataFrame(0, index=[0], columns=["a"]) - tm.assert_frame_equal(df, expected) - - def test_partial_set_empty_frame_empty_consistencies(self): - # GH 6171 - # consistency on empty frames - df = DataFrame(columns=["x", "y"]) - df["x"] = [1, 2] - expected = DataFrame({"x": [1, 2], "y": [np.nan, np.nan]}) - tm.assert_frame_equal(df, expected, check_dtype=False) - - df = DataFrame(columns=["x", "y"]) - df["x"] = ["1", "2"] - expected = DataFrame({"x": ["1", "2"], "y": [np.nan, np.nan]}, dtype=object) - tm.assert_frame_equal(df, expected) - - df = DataFrame(columns=["x", "y"]) - df.loc[0, "x"] = 1 - expected = DataFrame({"x": [1], "y": [np.nan]}) - tm.assert_frame_equal(df, expected, check_dtype=False) - @pytest.mark.parametrize( "idx,labels,expected_idx", [ @@ -584,14 +608,14 @@ def test_loc_with_list_of_strings_representing_datetimes_missing_value( self, idx, labels ): # GH 11278 - s = Series(range(20), index=idx) + ser = Series(range(20), index=idx) df = DataFrame(range(20), index=idx) msg = r"not in index" with pytest.raises(KeyError, match=msg): - s.loc[labels] + ser.loc[labels] with pytest.raises(KeyError, match=msg): - s[labels] + ser[labels] with pytest.raises(KeyError, match=msg): df.loc[labels] @@ -628,37 +652,18 @@ def test_loc_with_list_of_strings_representing_datetimes_not_matched_type( self, idx, labels, msg ): # GH 11278 - s = Series(range(20), index=idx) + ser = Series(range(20), index=idx) df = DataFrame(range(20), index=idx) with pytest.raises(KeyError, match=msg): - s.loc[labels] + ser.loc[labels] with pytest.raises(KeyError, match=msg): - s[labels] + ser[labels] with pytest.raises(KeyError, match=msg): df.loc[labels] - def test_index_name_empty(self): - # GH 31368 - df = DataFrame({}, index=pd.RangeIndex(0, name="df_index")) - series = Series(1.23, index=pd.RangeIndex(4, name="series_index")) - - df["series"] = series - expected = DataFrame( - {"series": [1.23] * 4}, index=pd.RangeIndex(4, name="df_index") - ) - - tm.assert_frame_equal(df, expected) - - # GH 36527 - df = DataFrame() - series = Series(1.23, index=pd.RangeIndex(4, name="series_index")) - df["series"] = series - expected = DataFrame( - {"series": [1.23] * 4}, index=pd.RangeIndex(4, name="series_index") - ) - tm.assert_frame_equal(df, expected) +class TestStringSlicing: def test_slice_irregular_datetime_index_with_nan(self): # GH36953 index = pd.to_datetime(["2012-01-01", "2012-01-02", "2012-01-03", None])