diff --git a/pandas/tests/frame/methods/test_assign.py b/pandas/tests/frame/methods/test_assign.py new file mode 100644 index 0000000000000..63b9f031de188 --- /dev/null +++ b/pandas/tests/frame/methods/test_assign.py @@ -0,0 +1,82 @@ +import pytest + +from pandas import DataFrame +import pandas._testing as tm + + +class TestAssign: + def test_assign(self): + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + original = df.copy() + result = df.assign(C=df.B / df.A) + expected = df.copy() + expected["C"] = [4, 2.5, 2] + tm.assert_frame_equal(result, expected) + + # lambda syntax + result = df.assign(C=lambda x: x.B / x.A) + tm.assert_frame_equal(result, expected) + + # original is unmodified + tm.assert_frame_equal(df, original) + + # Non-Series array-like + result = df.assign(C=[4, 2.5, 2]) + tm.assert_frame_equal(result, expected) + # original is unmodified + tm.assert_frame_equal(df, original) + + result = df.assign(B=df.B / df.A) + expected = expected.drop("B", axis=1).rename(columns={"C": "B"}) + tm.assert_frame_equal(result, expected) + + # overwrite + result = df.assign(A=df.A + df.B) + expected = df.copy() + expected["A"] = [5, 7, 9] + tm.assert_frame_equal(result, expected) + + # lambda + result = df.assign(A=lambda x: x.A + x.B) + tm.assert_frame_equal(result, expected) + + def test_assign_multiple(self): + df = DataFrame([[1, 4], [2, 5], [3, 6]], columns=["A", "B"]) + result = df.assign(C=[7, 8, 9], D=df.A, E=lambda x: x.B) + expected = DataFrame( + [[1, 4, 7, 1, 4], [2, 5, 8, 2, 5], [3, 6, 9, 3, 6]], columns=list("ABCDE") + ) + tm.assert_frame_equal(result, expected) + + def test_assign_order(self): + # GH 9818 + df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + result = df.assign(D=df.A + df.B, C=df.A - df.B) + + expected = DataFrame([[1, 2, 3, -1], [3, 4, 7, -1]], columns=list("ABDC")) + tm.assert_frame_equal(result, expected) + result = df.assign(C=df.A - df.B, D=df.A + df.B) + + expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]], columns=list("ABCD")) + + tm.assert_frame_equal(result, expected) + + def test_assign_bad(self): + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + + # non-keyword argument + with pytest.raises(TypeError): + df.assign(lambda x: x.A) + with pytest.raises(AttributeError): + df.assign(C=df.A, D=df.A + df.C) + + def test_assign_dependent(self): + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + + result = df.assign(C=df.A, D=lambda x: x["A"] + x["C"]) + expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]], columns=list("ABCD")) + tm.assert_frame_equal(result, expected) + + result = df.assign(C=lambda df: df.A, D=lambda df: df["A"] + df["C"]) + expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]], columns=list("ABCD")) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py new file mode 100644 index 0000000000000..3b8fa0dfbb603 --- /dev/null +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -0,0 +1,286 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import DataFrame, Series, date_range +import pandas._testing as tm + + +class TestDataFrameInterpolate: + def test_interp_basic(self): + df = DataFrame( + { + "A": [1, 2, np.nan, 4], + "B": [1, 4, 9, np.nan], + "C": [1, 2, 3, 5], + "D": list("abcd"), + } + ) + expected = DataFrame( + { + "A": [1.0, 2.0, 3.0, 4.0], + "B": [1.0, 4.0, 9.0, 9.0], + "C": [1, 2, 3, 5], + "D": list("abcd"), + } + ) + result = df.interpolate() + tm.assert_frame_equal(result, expected) + + result = df.set_index("C").interpolate() + expected = df.set_index("C") + expected.loc[3, "A"] = 3 + expected.loc[5, "B"] = 9 + tm.assert_frame_equal(result, expected) + + def test_interp_bad_method(self): + df = DataFrame( + { + "A": [1, 2, np.nan, 4], + "B": [1, 4, 9, np.nan], + "C": [1, 2, 3, 5], + "D": list("abcd"), + } + ) + with pytest.raises(ValueError): + df.interpolate(method="not_a_method") + + def test_interp_combo(self): + df = DataFrame( + { + "A": [1.0, 2.0, np.nan, 4.0], + "B": [1, 4, 9, np.nan], + "C": [1, 2, 3, 5], + "D": list("abcd"), + } + ) + + result = df["A"].interpolate() + expected = Series([1.0, 2.0, 3.0, 4.0], name="A") + tm.assert_series_equal(result, expected) + + result = df["A"].interpolate(downcast="infer") + expected = Series([1, 2, 3, 4], name="A") + tm.assert_series_equal(result, expected) + + def test_interp_nan_idx(self): + df = DataFrame({"A": [1, 2, np.nan, 4], "B": [np.nan, 2, 3, 4]}) + df = df.set_index("A") + with pytest.raises(NotImplementedError): + df.interpolate(method="values") + + @td.skip_if_no_scipy + def test_interp_various(self): + df = DataFrame( + {"A": [1, 2, np.nan, 4, 5, np.nan, 7], "C": [1, 2, 3, 5, 8, 13, 21]} + ) + df = df.set_index("C") + expected = df.copy() + result = df.interpolate(method="polynomial", order=1) + + expected.A.loc[3] = 2.66666667 + expected.A.loc[13] = 5.76923076 + tm.assert_frame_equal(result, expected) + + result = df.interpolate(method="cubic") + # GH #15662. + expected.A.loc[3] = 2.81547781 + expected.A.loc[13] = 5.52964175 + tm.assert_frame_equal(result, expected) + + result = df.interpolate(method="nearest") + expected.A.loc[3] = 2 + expected.A.loc[13] = 5 + tm.assert_frame_equal(result, expected, check_dtype=False) + + result = df.interpolate(method="quadratic") + expected.A.loc[3] = 2.82150771 + expected.A.loc[13] = 6.12648668 + tm.assert_frame_equal(result, expected) + + result = df.interpolate(method="slinear") + expected.A.loc[3] = 2.66666667 + expected.A.loc[13] = 5.76923077 + tm.assert_frame_equal(result, expected) + + result = df.interpolate(method="zero") + expected.A.loc[3] = 2.0 + expected.A.loc[13] = 5 + tm.assert_frame_equal(result, expected, check_dtype=False) + + @td.skip_if_no_scipy + def test_interp_alt_scipy(self): + df = DataFrame( + {"A": [1, 2, np.nan, 4, 5, np.nan, 7], "C": [1, 2, 3, 5, 8, 13, 21]} + ) + result = df.interpolate(method="barycentric") + expected = df.copy() + expected.loc[2, "A"] = 3 + expected.loc[5, "A"] = 6 + tm.assert_frame_equal(result, expected) + + result = df.interpolate(method="barycentric", downcast="infer") + tm.assert_frame_equal(result, expected.astype(np.int64)) + + result = df.interpolate(method="krogh") + expectedk = df.copy() + expectedk["A"] = expected["A"] + tm.assert_frame_equal(result, expectedk) + + result = df.interpolate(method="pchip") + expected.loc[2, "A"] = 3 + expected.loc[5, "A"] = 6.0 + + tm.assert_frame_equal(result, expected) + + def test_interp_rowwise(self): + df = DataFrame( + { + 0: [1, 2, np.nan, 4], + 1: [2, 3, 4, np.nan], + 2: [np.nan, 4, 5, 6], + 3: [4, np.nan, 6, 7], + 4: [1, 2, 3, 4], + } + ) + result = df.interpolate(axis=1) + expected = df.copy() + expected.loc[3, 1] = 5 + expected.loc[0, 2] = 3 + expected.loc[1, 3] = 3 + expected[4] = expected[4].astype(np.float64) + tm.assert_frame_equal(result, expected) + + result = df.interpolate(axis=1, method="values") + tm.assert_frame_equal(result, expected) + + result = df.interpolate(axis=0) + expected = df.interpolate() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "axis_name, axis_number", + [ + pytest.param("rows", 0, id="rows_0"), + pytest.param("index", 0, id="index_0"), + pytest.param("columns", 1, id="columns_1"), + ], + ) + def test_interp_axis_names(self, axis_name, axis_number): + # GH 29132: test axis names + data = {0: [0, np.nan, 6], 1: [1, np.nan, 7], 2: [2, 5, 8]} + + df = DataFrame(data, dtype=np.float64) + result = df.interpolate(axis=axis_name, method="linear") + expected = df.interpolate(axis=axis_number, method="linear") + tm.assert_frame_equal(result, expected) + + def test_rowwise_alt(self): + df = DataFrame( + { + 0: [0, 0.5, 1.0, np.nan, 4, 8, np.nan, np.nan, 64], + 1: [1, 2, 3, 4, 3, 2, 1, 0, -1], + } + ) + df.interpolate(axis=0) + # TODO: assert something? + + @pytest.mark.parametrize( + "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)] + ) + def test_interp_leading_nans(self, check_scipy): + df = DataFrame( + {"A": [np.nan, np.nan, 0.5, 0.25, 0], "B": [np.nan, -3, -3.5, np.nan, -4]} + ) + result = df.interpolate() + expected = df.copy() + expected["B"].loc[3] = -3.75 + tm.assert_frame_equal(result, expected) + + if check_scipy: + result = df.interpolate(method="polynomial", order=1) + tm.assert_frame_equal(result, expected) + + def test_interp_raise_on_only_mixed(self): + df = DataFrame( + { + "A": [1, 2, np.nan, 4], + "B": ["a", "b", "c", "d"], + "C": [np.nan, 2, 5, 7], + "D": [np.nan, np.nan, 9, 9], + "E": [1, 2, 3, 4], + } + ) + with pytest.raises(TypeError): + df.interpolate(axis=1) + + def test_interp_raise_on_all_object_dtype(self): + # GH 22985 + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, dtype="object") + msg = ( + "Cannot interpolate with all object-dtype columns " + "in the DataFrame. Try setting at least one " + "column to a numeric dtype." + ) + with pytest.raises(TypeError, match=msg): + df.interpolate() + + def test_interp_inplace(self): + df = DataFrame({"a": [1.0, 2.0, np.nan, 4.0]}) + expected = DataFrame({"a": [1.0, 2.0, 3.0, 4.0]}) + result = df.copy() + result["a"].interpolate(inplace=True) + tm.assert_frame_equal(result, expected) + + result = df.copy() + result["a"].interpolate(inplace=True, downcast="infer") + tm.assert_frame_equal(result, expected.astype("int64")) + + def test_interp_inplace_row(self): + # GH 10395 + result = DataFrame( + {"a": [1.0, 2.0, 3.0, 4.0], "b": [np.nan, 2.0, 3.0, 4.0], "c": [3, 2, 2, 2]} + ) + expected = result.interpolate(method="linear", axis=1, inplace=False) + result.interpolate(method="linear", axis=1, inplace=True) + tm.assert_frame_equal(result, expected) + + def test_interp_ignore_all_good(self): + # GH + df = DataFrame( + { + "A": [1, 2, np.nan, 4], + "B": [1, 2, 3, 4], + "C": [1.0, 2.0, np.nan, 4.0], + "D": [1.0, 2.0, 3.0, 4.0], + } + ) + expected = DataFrame( + { + "A": np.array([1, 2, 3, 4], dtype="float64"), + "B": np.array([1, 2, 3, 4], dtype="int64"), + "C": np.array([1.0, 2.0, 3, 4.0], dtype="float64"), + "D": np.array([1.0, 2.0, 3.0, 4.0], dtype="float64"), + } + ) + + result = df.interpolate(downcast=None) + tm.assert_frame_equal(result, expected) + + # all good + result = df[["B", "D"]].interpolate(downcast=None) + tm.assert_frame_equal(result, df[["B", "D"]]) + + @pytest.mark.parametrize("axis", [0, 1]) + def test_interp_time_inplace_axis(self, axis): + # GH 9687 + periods = 5 + idx = date_range(start="2014-01-01", periods=periods) + data = np.random.rand(periods, periods) + data[data < 0.5] = np.nan + expected = DataFrame(index=idx, columns=idx, data=data) + + result = expected.interpolate(axis=0, method="time") + expected.interpolate(axis=0, method="time", inplace=True) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index ae0516dd29a1f..196df8ba00476 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -4,8 +4,6 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - import pandas as pd from pandas import Categorical, DataFrame, Series, Timestamp, date_range import pandas._testing as tm @@ -705,281 +703,3 @@ def test_fill_value_when_combine_const(self): exp = df.fillna(0).add(2) res = df.add(2, fill_value=0) tm.assert_frame_equal(res, exp) - - -class TestDataFrameInterpolate: - def test_interp_basic(self): - df = DataFrame( - { - "A": [1, 2, np.nan, 4], - "B": [1, 4, 9, np.nan], - "C": [1, 2, 3, 5], - "D": list("abcd"), - } - ) - expected = DataFrame( - { - "A": [1.0, 2.0, 3.0, 4.0], - "B": [1.0, 4.0, 9.0, 9.0], - "C": [1, 2, 3, 5], - "D": list("abcd"), - } - ) - result = df.interpolate() - tm.assert_frame_equal(result, expected) - - result = df.set_index("C").interpolate() - expected = df.set_index("C") - expected.loc[3, "A"] = 3 - expected.loc[5, "B"] = 9 - tm.assert_frame_equal(result, expected) - - def test_interp_bad_method(self): - df = DataFrame( - { - "A": [1, 2, np.nan, 4], - "B": [1, 4, 9, np.nan], - "C": [1, 2, 3, 5], - "D": list("abcd"), - } - ) - with pytest.raises(ValueError): - df.interpolate(method="not_a_method") - - def test_interp_combo(self): - df = DataFrame( - { - "A": [1.0, 2.0, np.nan, 4.0], - "B": [1, 4, 9, np.nan], - "C": [1, 2, 3, 5], - "D": list("abcd"), - } - ) - - result = df["A"].interpolate() - expected = Series([1.0, 2.0, 3.0, 4.0], name="A") - tm.assert_series_equal(result, expected) - - result = df["A"].interpolate(downcast="infer") - expected = Series([1, 2, 3, 4], name="A") - tm.assert_series_equal(result, expected) - - def test_interp_nan_idx(self): - df = DataFrame({"A": [1, 2, np.nan, 4], "B": [np.nan, 2, 3, 4]}) - df = df.set_index("A") - with pytest.raises(NotImplementedError): - df.interpolate(method="values") - - @td.skip_if_no_scipy - def test_interp_various(self): - df = DataFrame( - {"A": [1, 2, np.nan, 4, 5, np.nan, 7], "C": [1, 2, 3, 5, 8, 13, 21]} - ) - df = df.set_index("C") - expected = df.copy() - result = df.interpolate(method="polynomial", order=1) - - expected.A.loc[3] = 2.66666667 - expected.A.loc[13] = 5.76923076 - tm.assert_frame_equal(result, expected) - - result = df.interpolate(method="cubic") - # GH #15662. - expected.A.loc[3] = 2.81547781 - expected.A.loc[13] = 5.52964175 - tm.assert_frame_equal(result, expected) - - result = df.interpolate(method="nearest") - expected.A.loc[3] = 2 - expected.A.loc[13] = 5 - tm.assert_frame_equal(result, expected, check_dtype=False) - - result = df.interpolate(method="quadratic") - expected.A.loc[3] = 2.82150771 - expected.A.loc[13] = 6.12648668 - tm.assert_frame_equal(result, expected) - - result = df.interpolate(method="slinear") - expected.A.loc[3] = 2.66666667 - expected.A.loc[13] = 5.76923077 - tm.assert_frame_equal(result, expected) - - result = df.interpolate(method="zero") - expected.A.loc[3] = 2.0 - expected.A.loc[13] = 5 - tm.assert_frame_equal(result, expected, check_dtype=False) - - @td.skip_if_no_scipy - def test_interp_alt_scipy(self): - df = DataFrame( - {"A": [1, 2, np.nan, 4, 5, np.nan, 7], "C": [1, 2, 3, 5, 8, 13, 21]} - ) - result = df.interpolate(method="barycentric") - expected = df.copy() - expected.loc[2, "A"] = 3 - expected.loc[5, "A"] = 6 - tm.assert_frame_equal(result, expected) - - result = df.interpolate(method="barycentric", downcast="infer") - tm.assert_frame_equal(result, expected.astype(np.int64)) - - result = df.interpolate(method="krogh") - expectedk = df.copy() - expectedk["A"] = expected["A"] - tm.assert_frame_equal(result, expectedk) - - result = df.interpolate(method="pchip") - expected.loc[2, "A"] = 3 - expected.loc[5, "A"] = 6.0 - - tm.assert_frame_equal(result, expected) - - def test_interp_rowwise(self): - df = DataFrame( - { - 0: [1, 2, np.nan, 4], - 1: [2, 3, 4, np.nan], - 2: [np.nan, 4, 5, 6], - 3: [4, np.nan, 6, 7], - 4: [1, 2, 3, 4], - } - ) - result = df.interpolate(axis=1) - expected = df.copy() - expected.loc[3, 1] = 5 - expected.loc[0, 2] = 3 - expected.loc[1, 3] = 3 - expected[4] = expected[4].astype(np.float64) - tm.assert_frame_equal(result, expected) - - result = df.interpolate(axis=1, method="values") - tm.assert_frame_equal(result, expected) - - result = df.interpolate(axis=0) - expected = df.interpolate() - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize( - "axis_name, axis_number", - [ - pytest.param("rows", 0, id="rows_0"), - pytest.param("index", 0, id="index_0"), - pytest.param("columns", 1, id="columns_1"), - ], - ) - def test_interp_axis_names(self, axis_name, axis_number): - # GH 29132: test axis names - data = {0: [0, np.nan, 6], 1: [1, np.nan, 7], 2: [2, 5, 8]} - - df = DataFrame(data, dtype=np.float64) - result = df.interpolate(axis=axis_name, method="linear") - expected = df.interpolate(axis=axis_number, method="linear") - tm.assert_frame_equal(result, expected) - - def test_rowwise_alt(self): - df = DataFrame( - { - 0: [0, 0.5, 1.0, np.nan, 4, 8, np.nan, np.nan, 64], - 1: [1, 2, 3, 4, 3, 2, 1, 0, -1], - } - ) - df.interpolate(axis=0) - - @pytest.mark.parametrize( - "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)] - ) - def test_interp_leading_nans(self, check_scipy): - df = DataFrame( - {"A": [np.nan, np.nan, 0.5, 0.25, 0], "B": [np.nan, -3, -3.5, np.nan, -4]} - ) - result = df.interpolate() - expected = df.copy() - expected["B"].loc[3] = -3.75 - tm.assert_frame_equal(result, expected) - - if check_scipy: - result = df.interpolate(method="polynomial", order=1) - tm.assert_frame_equal(result, expected) - - def test_interp_raise_on_only_mixed(self): - df = DataFrame( - { - "A": [1, 2, np.nan, 4], - "B": ["a", "b", "c", "d"], - "C": [np.nan, 2, 5, 7], - "D": [np.nan, np.nan, 9, 9], - "E": [1, 2, 3, 4], - } - ) - with pytest.raises(TypeError): - df.interpolate(axis=1) - - def test_interp_raise_on_all_object_dtype(self): - # GH 22985 - df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, dtype="object") - msg = ( - "Cannot interpolate with all object-dtype columns " - "in the DataFrame. Try setting at least one " - "column to a numeric dtype." - ) - with pytest.raises(TypeError, match=msg): - df.interpolate() - - def test_interp_inplace(self): - df = DataFrame({"a": [1.0, 2.0, np.nan, 4.0]}) - expected = DataFrame({"a": [1.0, 2.0, 3.0, 4.0]}) - result = df.copy() - result["a"].interpolate(inplace=True) - tm.assert_frame_equal(result, expected) - - result = df.copy() - result["a"].interpolate(inplace=True, downcast="infer") - tm.assert_frame_equal(result, expected.astype("int64")) - - def test_interp_inplace_row(self): - # GH 10395 - result = DataFrame( - {"a": [1.0, 2.0, 3.0, 4.0], "b": [np.nan, 2.0, 3.0, 4.0], "c": [3, 2, 2, 2]} - ) - expected = result.interpolate(method="linear", axis=1, inplace=False) - result.interpolate(method="linear", axis=1, inplace=True) - tm.assert_frame_equal(result, expected) - - def test_interp_ignore_all_good(self): - # GH - df = DataFrame( - { - "A": [1, 2, np.nan, 4], - "B": [1, 2, 3, 4], - "C": [1.0, 2.0, np.nan, 4.0], - "D": [1.0, 2.0, 3.0, 4.0], - } - ) - expected = DataFrame( - { - "A": np.array([1, 2, 3, 4], dtype="float64"), - "B": np.array([1, 2, 3, 4], dtype="int64"), - "C": np.array([1.0, 2.0, 3, 4.0], dtype="float64"), - "D": np.array([1.0, 2.0, 3.0, 4.0], dtype="float64"), - } - ) - - result = df.interpolate(downcast=None) - tm.assert_frame_equal(result, expected) - - # all good - result = df[["B", "D"]].interpolate(downcast=None) - tm.assert_frame_equal(result, df[["B", "D"]]) - - @pytest.mark.parametrize("axis", [0, 1]) - def test_interp_time_inplace_axis(self, axis): - # GH 9687 - periods = 5 - idx = pd.date_range(start="2014-01-01", periods=periods) - data = np.random.rand(periods, periods) - data[data < 0.5] = np.nan - expected = pd.DataFrame(index=idx, columns=idx, data=data) - - result = expected.interpolate(axis=0, method="time") - expected.interpolate(axis=0, method="time", inplace=True) - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index 8bc2aa214e035..33f71602f4713 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -10,82 +10,6 @@ class TestDataFrameMutateColumns: - def test_assign(self): - df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - original = df.copy() - result = df.assign(C=df.B / df.A) - expected = df.copy() - expected["C"] = [4, 2.5, 2] - tm.assert_frame_equal(result, expected) - - # lambda syntax - result = df.assign(C=lambda x: x.B / x.A) - tm.assert_frame_equal(result, expected) - - # original is unmodified - tm.assert_frame_equal(df, original) - - # Non-Series array-like - result = df.assign(C=[4, 2.5, 2]) - tm.assert_frame_equal(result, expected) - # original is unmodified - tm.assert_frame_equal(df, original) - - result = df.assign(B=df.B / df.A) - expected = expected.drop("B", axis=1).rename(columns={"C": "B"}) - tm.assert_frame_equal(result, expected) - - # overwrite - result = df.assign(A=df.A + df.B) - expected = df.copy() - expected["A"] = [5, 7, 9] - tm.assert_frame_equal(result, expected) - - # lambda - result = df.assign(A=lambda x: x.A + x.B) - tm.assert_frame_equal(result, expected) - - def test_assign_multiple(self): - df = DataFrame([[1, 4], [2, 5], [3, 6]], columns=["A", "B"]) - result = df.assign(C=[7, 8, 9], D=df.A, E=lambda x: x.B) - expected = DataFrame( - [[1, 4, 7, 1, 4], [2, 5, 8, 2, 5], [3, 6, 9, 3, 6]], columns=list("ABCDE") - ) - tm.assert_frame_equal(result, expected) - - def test_assign_order(self): - # GH 9818 - df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) - result = df.assign(D=df.A + df.B, C=df.A - df.B) - - expected = DataFrame([[1, 2, 3, -1], [3, 4, 7, -1]], columns=list("ABDC")) - tm.assert_frame_equal(result, expected) - result = df.assign(C=df.A - df.B, D=df.A + df.B) - - expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]], columns=list("ABCD")) - - tm.assert_frame_equal(result, expected) - - def test_assign_bad(self): - df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - - # non-keyword argument - with pytest.raises(TypeError): - df.assign(lambda x: x.A) - with pytest.raises(AttributeError): - df.assign(C=df.A, D=df.A + df.C) - - def test_assign_dependent(self): - df = DataFrame({"A": [1, 2], "B": [3, 4]}) - - result = df.assign(C=df.A, D=lambda x: x["A"] + x["C"]) - expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]], columns=list("ABCD")) - tm.assert_frame_equal(result, expected) - - result = df.assign(C=lambda df: df.A, D=lambda df: df["A"] + df["C"]) - expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]], columns=list("ABCD")) - tm.assert_frame_equal(result, expected) - def test_insert_error_msmgs(self): # GH 7432