Skip to content

Commit 1825fa1

Browse files
authored
TST: method-specific files for DataFrame assign, interpolate (#32110)
1 parent 8de4096 commit 1825fa1

File tree

4 files changed

+368
-356
lines changed

4 files changed

+368
-356
lines changed
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import pytest
2+
3+
from pandas import DataFrame
4+
import pandas._testing as tm
5+
6+
7+
class TestAssign:
8+
def test_assign(self):
9+
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
10+
original = df.copy()
11+
result = df.assign(C=df.B / df.A)
12+
expected = df.copy()
13+
expected["C"] = [4, 2.5, 2]
14+
tm.assert_frame_equal(result, expected)
15+
16+
# lambda syntax
17+
result = df.assign(C=lambda x: x.B / x.A)
18+
tm.assert_frame_equal(result, expected)
19+
20+
# original is unmodified
21+
tm.assert_frame_equal(df, original)
22+
23+
# Non-Series array-like
24+
result = df.assign(C=[4, 2.5, 2])
25+
tm.assert_frame_equal(result, expected)
26+
# original is unmodified
27+
tm.assert_frame_equal(df, original)
28+
29+
result = df.assign(B=df.B / df.A)
30+
expected = expected.drop("B", axis=1).rename(columns={"C": "B"})
31+
tm.assert_frame_equal(result, expected)
32+
33+
# overwrite
34+
result = df.assign(A=df.A + df.B)
35+
expected = df.copy()
36+
expected["A"] = [5, 7, 9]
37+
tm.assert_frame_equal(result, expected)
38+
39+
# lambda
40+
result = df.assign(A=lambda x: x.A + x.B)
41+
tm.assert_frame_equal(result, expected)
42+
43+
def test_assign_multiple(self):
44+
df = DataFrame([[1, 4], [2, 5], [3, 6]], columns=["A", "B"])
45+
result = df.assign(C=[7, 8, 9], D=df.A, E=lambda x: x.B)
46+
expected = DataFrame(
47+
[[1, 4, 7, 1, 4], [2, 5, 8, 2, 5], [3, 6, 9, 3, 6]], columns=list("ABCDE")
48+
)
49+
tm.assert_frame_equal(result, expected)
50+
51+
def test_assign_order(self):
52+
# GH 9818
53+
df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
54+
result = df.assign(D=df.A + df.B, C=df.A - df.B)
55+
56+
expected = DataFrame([[1, 2, 3, -1], [3, 4, 7, -1]], columns=list("ABDC"))
57+
tm.assert_frame_equal(result, expected)
58+
result = df.assign(C=df.A - df.B, D=df.A + df.B)
59+
60+
expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]], columns=list("ABCD"))
61+
62+
tm.assert_frame_equal(result, expected)
63+
64+
def test_assign_bad(self):
65+
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
66+
67+
# non-keyword argument
68+
with pytest.raises(TypeError):
69+
df.assign(lambda x: x.A)
70+
with pytest.raises(AttributeError):
71+
df.assign(C=df.A, D=df.A + df.C)
72+
73+
def test_assign_dependent(self):
74+
df = DataFrame({"A": [1, 2], "B": [3, 4]})
75+
76+
result = df.assign(C=df.A, D=lambda x: x["A"] + x["C"])
77+
expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]], columns=list("ABCD"))
78+
tm.assert_frame_equal(result, expected)
79+
80+
result = df.assign(C=lambda df: df.A, D=lambda df: df["A"] + df["C"])
81+
expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]], columns=list("ABCD"))
82+
tm.assert_frame_equal(result, expected)
Lines changed: 286 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,286 @@
1+
import numpy as np
2+
import pytest
3+
4+
import pandas.util._test_decorators as td
5+
6+
from pandas import DataFrame, Series, date_range
7+
import pandas._testing as tm
8+
9+
10+
class TestDataFrameInterpolate:
11+
def test_interp_basic(self):
12+
df = DataFrame(
13+
{
14+
"A": [1, 2, np.nan, 4],
15+
"B": [1, 4, 9, np.nan],
16+
"C": [1, 2, 3, 5],
17+
"D": list("abcd"),
18+
}
19+
)
20+
expected = DataFrame(
21+
{
22+
"A": [1.0, 2.0, 3.0, 4.0],
23+
"B": [1.0, 4.0, 9.0, 9.0],
24+
"C": [1, 2, 3, 5],
25+
"D": list("abcd"),
26+
}
27+
)
28+
result = df.interpolate()
29+
tm.assert_frame_equal(result, expected)
30+
31+
result = df.set_index("C").interpolate()
32+
expected = df.set_index("C")
33+
expected.loc[3, "A"] = 3
34+
expected.loc[5, "B"] = 9
35+
tm.assert_frame_equal(result, expected)
36+
37+
def test_interp_bad_method(self):
38+
df = DataFrame(
39+
{
40+
"A": [1, 2, np.nan, 4],
41+
"B": [1, 4, 9, np.nan],
42+
"C": [1, 2, 3, 5],
43+
"D": list("abcd"),
44+
}
45+
)
46+
with pytest.raises(ValueError):
47+
df.interpolate(method="not_a_method")
48+
49+
def test_interp_combo(self):
50+
df = DataFrame(
51+
{
52+
"A": [1.0, 2.0, np.nan, 4.0],
53+
"B": [1, 4, 9, np.nan],
54+
"C": [1, 2, 3, 5],
55+
"D": list("abcd"),
56+
}
57+
)
58+
59+
result = df["A"].interpolate()
60+
expected = Series([1.0, 2.0, 3.0, 4.0], name="A")
61+
tm.assert_series_equal(result, expected)
62+
63+
result = df["A"].interpolate(downcast="infer")
64+
expected = Series([1, 2, 3, 4], name="A")
65+
tm.assert_series_equal(result, expected)
66+
67+
def test_interp_nan_idx(self):
68+
df = DataFrame({"A": [1, 2, np.nan, 4], "B": [np.nan, 2, 3, 4]})
69+
df = df.set_index("A")
70+
with pytest.raises(NotImplementedError):
71+
df.interpolate(method="values")
72+
73+
@td.skip_if_no_scipy
74+
def test_interp_various(self):
75+
df = DataFrame(
76+
{"A": [1, 2, np.nan, 4, 5, np.nan, 7], "C": [1, 2, 3, 5, 8, 13, 21]}
77+
)
78+
df = df.set_index("C")
79+
expected = df.copy()
80+
result = df.interpolate(method="polynomial", order=1)
81+
82+
expected.A.loc[3] = 2.66666667
83+
expected.A.loc[13] = 5.76923076
84+
tm.assert_frame_equal(result, expected)
85+
86+
result = df.interpolate(method="cubic")
87+
# GH #15662.
88+
expected.A.loc[3] = 2.81547781
89+
expected.A.loc[13] = 5.52964175
90+
tm.assert_frame_equal(result, expected)
91+
92+
result = df.interpolate(method="nearest")
93+
expected.A.loc[3] = 2
94+
expected.A.loc[13] = 5
95+
tm.assert_frame_equal(result, expected, check_dtype=False)
96+
97+
result = df.interpolate(method="quadratic")
98+
expected.A.loc[3] = 2.82150771
99+
expected.A.loc[13] = 6.12648668
100+
tm.assert_frame_equal(result, expected)
101+
102+
result = df.interpolate(method="slinear")
103+
expected.A.loc[3] = 2.66666667
104+
expected.A.loc[13] = 5.76923077
105+
tm.assert_frame_equal(result, expected)
106+
107+
result = df.interpolate(method="zero")
108+
expected.A.loc[3] = 2.0
109+
expected.A.loc[13] = 5
110+
tm.assert_frame_equal(result, expected, check_dtype=False)
111+
112+
@td.skip_if_no_scipy
113+
def test_interp_alt_scipy(self):
114+
df = DataFrame(
115+
{"A": [1, 2, np.nan, 4, 5, np.nan, 7], "C": [1, 2, 3, 5, 8, 13, 21]}
116+
)
117+
result = df.interpolate(method="barycentric")
118+
expected = df.copy()
119+
expected.loc[2, "A"] = 3
120+
expected.loc[5, "A"] = 6
121+
tm.assert_frame_equal(result, expected)
122+
123+
result = df.interpolate(method="barycentric", downcast="infer")
124+
tm.assert_frame_equal(result, expected.astype(np.int64))
125+
126+
result = df.interpolate(method="krogh")
127+
expectedk = df.copy()
128+
expectedk["A"] = expected["A"]
129+
tm.assert_frame_equal(result, expectedk)
130+
131+
result = df.interpolate(method="pchip")
132+
expected.loc[2, "A"] = 3
133+
expected.loc[5, "A"] = 6.0
134+
135+
tm.assert_frame_equal(result, expected)
136+
137+
def test_interp_rowwise(self):
138+
df = DataFrame(
139+
{
140+
0: [1, 2, np.nan, 4],
141+
1: [2, 3, 4, np.nan],
142+
2: [np.nan, 4, 5, 6],
143+
3: [4, np.nan, 6, 7],
144+
4: [1, 2, 3, 4],
145+
}
146+
)
147+
result = df.interpolate(axis=1)
148+
expected = df.copy()
149+
expected.loc[3, 1] = 5
150+
expected.loc[0, 2] = 3
151+
expected.loc[1, 3] = 3
152+
expected[4] = expected[4].astype(np.float64)
153+
tm.assert_frame_equal(result, expected)
154+
155+
result = df.interpolate(axis=1, method="values")
156+
tm.assert_frame_equal(result, expected)
157+
158+
result = df.interpolate(axis=0)
159+
expected = df.interpolate()
160+
tm.assert_frame_equal(result, expected)
161+
162+
@pytest.mark.parametrize(
163+
"axis_name, axis_number",
164+
[
165+
pytest.param("rows", 0, id="rows_0"),
166+
pytest.param("index", 0, id="index_0"),
167+
pytest.param("columns", 1, id="columns_1"),
168+
],
169+
)
170+
def test_interp_axis_names(self, axis_name, axis_number):
171+
# GH 29132: test axis names
172+
data = {0: [0, np.nan, 6], 1: [1, np.nan, 7], 2: [2, 5, 8]}
173+
174+
df = DataFrame(data, dtype=np.float64)
175+
result = df.interpolate(axis=axis_name, method="linear")
176+
expected = df.interpolate(axis=axis_number, method="linear")
177+
tm.assert_frame_equal(result, expected)
178+
179+
def test_rowwise_alt(self):
180+
df = DataFrame(
181+
{
182+
0: [0, 0.5, 1.0, np.nan, 4, 8, np.nan, np.nan, 64],
183+
1: [1, 2, 3, 4, 3, 2, 1, 0, -1],
184+
}
185+
)
186+
df.interpolate(axis=0)
187+
# TODO: assert something?
188+
189+
@pytest.mark.parametrize(
190+
"check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)]
191+
)
192+
def test_interp_leading_nans(self, check_scipy):
193+
df = DataFrame(
194+
{"A": [np.nan, np.nan, 0.5, 0.25, 0], "B": [np.nan, -3, -3.5, np.nan, -4]}
195+
)
196+
result = df.interpolate()
197+
expected = df.copy()
198+
expected["B"].loc[3] = -3.75
199+
tm.assert_frame_equal(result, expected)
200+
201+
if check_scipy:
202+
result = df.interpolate(method="polynomial", order=1)
203+
tm.assert_frame_equal(result, expected)
204+
205+
def test_interp_raise_on_only_mixed(self):
206+
df = DataFrame(
207+
{
208+
"A": [1, 2, np.nan, 4],
209+
"B": ["a", "b", "c", "d"],
210+
"C": [np.nan, 2, 5, 7],
211+
"D": [np.nan, np.nan, 9, 9],
212+
"E": [1, 2, 3, 4],
213+
}
214+
)
215+
with pytest.raises(TypeError):
216+
df.interpolate(axis=1)
217+
218+
def test_interp_raise_on_all_object_dtype(self):
219+
# GH 22985
220+
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, dtype="object")
221+
msg = (
222+
"Cannot interpolate with all object-dtype columns "
223+
"in the DataFrame. Try setting at least one "
224+
"column to a numeric dtype."
225+
)
226+
with pytest.raises(TypeError, match=msg):
227+
df.interpolate()
228+
229+
def test_interp_inplace(self):
230+
df = DataFrame({"a": [1.0, 2.0, np.nan, 4.0]})
231+
expected = DataFrame({"a": [1.0, 2.0, 3.0, 4.0]})
232+
result = df.copy()
233+
result["a"].interpolate(inplace=True)
234+
tm.assert_frame_equal(result, expected)
235+
236+
result = df.copy()
237+
result["a"].interpolate(inplace=True, downcast="infer")
238+
tm.assert_frame_equal(result, expected.astype("int64"))
239+
240+
def test_interp_inplace_row(self):
241+
# GH 10395
242+
result = DataFrame(
243+
{"a": [1.0, 2.0, 3.0, 4.0], "b": [np.nan, 2.0, 3.0, 4.0], "c": [3, 2, 2, 2]}
244+
)
245+
expected = result.interpolate(method="linear", axis=1, inplace=False)
246+
result.interpolate(method="linear", axis=1, inplace=True)
247+
tm.assert_frame_equal(result, expected)
248+
249+
def test_interp_ignore_all_good(self):
250+
# GH
251+
df = DataFrame(
252+
{
253+
"A": [1, 2, np.nan, 4],
254+
"B": [1, 2, 3, 4],
255+
"C": [1.0, 2.0, np.nan, 4.0],
256+
"D": [1.0, 2.0, 3.0, 4.0],
257+
}
258+
)
259+
expected = DataFrame(
260+
{
261+
"A": np.array([1, 2, 3, 4], dtype="float64"),
262+
"B": np.array([1, 2, 3, 4], dtype="int64"),
263+
"C": np.array([1.0, 2.0, 3, 4.0], dtype="float64"),
264+
"D": np.array([1.0, 2.0, 3.0, 4.0], dtype="float64"),
265+
}
266+
)
267+
268+
result = df.interpolate(downcast=None)
269+
tm.assert_frame_equal(result, expected)
270+
271+
# all good
272+
result = df[["B", "D"]].interpolate(downcast=None)
273+
tm.assert_frame_equal(result, df[["B", "D"]])
274+
275+
@pytest.mark.parametrize("axis", [0, 1])
276+
def test_interp_time_inplace_axis(self, axis):
277+
# GH 9687
278+
periods = 5
279+
idx = date_range(start="2014-01-01", periods=periods)
280+
data = np.random.rand(periods, periods)
281+
data[data < 0.5] = np.nan
282+
expected = DataFrame(index=idx, columns=idx, data=data)
283+
284+
result = expected.interpolate(axis=0, method="time")
285+
expected.interpolate(axis=0, method="time", inplace=True)
286+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)