Skip to content

TST: method-specific files for DataFrame assign, interpolate #32110

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 22, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 82 additions & 0 deletions pandas/tests/frame/methods/test_assign.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import pytest

from pandas import DataFrame
import pandas._testing as tm


class TestAssign:
def test_assign(self):
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
original = df.copy()
result = df.assign(C=df.B / df.A)
expected = df.copy()
expected["C"] = [4, 2.5, 2]
tm.assert_frame_equal(result, expected)

# lambda syntax
result = df.assign(C=lambda x: x.B / x.A)
tm.assert_frame_equal(result, expected)

# original is unmodified
tm.assert_frame_equal(df, original)

# Non-Series array-like
result = df.assign(C=[4, 2.5, 2])
tm.assert_frame_equal(result, expected)
# original is unmodified
tm.assert_frame_equal(df, original)

result = df.assign(B=df.B / df.A)
expected = expected.drop("B", axis=1).rename(columns={"C": "B"})
tm.assert_frame_equal(result, expected)

# overwrite
result = df.assign(A=df.A + df.B)
expected = df.copy()
expected["A"] = [5, 7, 9]
tm.assert_frame_equal(result, expected)

# lambda
result = df.assign(A=lambda x: x.A + x.B)
tm.assert_frame_equal(result, expected)

def test_assign_multiple(self):
df = DataFrame([[1, 4], [2, 5], [3, 6]], columns=["A", "B"])
result = df.assign(C=[7, 8, 9], D=df.A, E=lambda x: x.B)
expected = DataFrame(
[[1, 4, 7, 1, 4], [2, 5, 8, 2, 5], [3, 6, 9, 3, 6]], columns=list("ABCDE")
)
tm.assert_frame_equal(result, expected)

def test_assign_order(self):
# GH 9818
df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
result = df.assign(D=df.A + df.B, C=df.A - df.B)

expected = DataFrame([[1, 2, 3, -1], [3, 4, 7, -1]], columns=list("ABDC"))
tm.assert_frame_equal(result, expected)
result = df.assign(C=df.A - df.B, D=df.A + df.B)

expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]], columns=list("ABCD"))

tm.assert_frame_equal(result, expected)

def test_assign_bad(self):
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})

# non-keyword argument
with pytest.raises(TypeError):
df.assign(lambda x: x.A)
with pytest.raises(AttributeError):
df.assign(C=df.A, D=df.A + df.C)

def test_assign_dependent(self):
df = DataFrame({"A": [1, 2], "B": [3, 4]})

result = df.assign(C=df.A, D=lambda x: x["A"] + x["C"])
expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]], columns=list("ABCD"))
tm.assert_frame_equal(result, expected)

result = df.assign(C=lambda df: df.A, D=lambda df: df["A"] + df["C"])
expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]], columns=list("ABCD"))
tm.assert_frame_equal(result, expected)
286 changes: 286 additions & 0 deletions pandas/tests/frame/methods/test_interpolate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,286 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

from pandas import DataFrame, Series, date_range
import pandas._testing as tm


class TestDataFrameInterpolate:
def test_interp_basic(self):
df = DataFrame(
{
"A": [1, 2, np.nan, 4],
"B": [1, 4, 9, np.nan],
"C": [1, 2, 3, 5],
"D": list("abcd"),
}
)
expected = DataFrame(
{
"A": [1.0, 2.0, 3.0, 4.0],
"B": [1.0, 4.0, 9.0, 9.0],
"C": [1, 2, 3, 5],
"D": list("abcd"),
}
)
result = df.interpolate()
tm.assert_frame_equal(result, expected)

result = df.set_index("C").interpolate()
expected = df.set_index("C")
expected.loc[3, "A"] = 3
expected.loc[5, "B"] = 9
tm.assert_frame_equal(result, expected)

def test_interp_bad_method(self):
df = DataFrame(
{
"A": [1, 2, np.nan, 4],
"B": [1, 4, 9, np.nan],
"C": [1, 2, 3, 5],
"D": list("abcd"),
}
)
with pytest.raises(ValueError):
df.interpolate(method="not_a_method")

def test_interp_combo(self):
df = DataFrame(
{
"A": [1.0, 2.0, np.nan, 4.0],
"B": [1, 4, 9, np.nan],
"C": [1, 2, 3, 5],
"D": list("abcd"),
}
)

result = df["A"].interpolate()
expected = Series([1.0, 2.0, 3.0, 4.0], name="A")
tm.assert_series_equal(result, expected)

result = df["A"].interpolate(downcast="infer")
expected = Series([1, 2, 3, 4], name="A")
tm.assert_series_equal(result, expected)

def test_interp_nan_idx(self):
df = DataFrame({"A": [1, 2, np.nan, 4], "B": [np.nan, 2, 3, 4]})
df = df.set_index("A")
with pytest.raises(NotImplementedError):
df.interpolate(method="values")

@td.skip_if_no_scipy
def test_interp_various(self):
df = DataFrame(
{"A": [1, 2, np.nan, 4, 5, np.nan, 7], "C": [1, 2, 3, 5, 8, 13, 21]}
)
df = df.set_index("C")
expected = df.copy()
result = df.interpolate(method="polynomial", order=1)

expected.A.loc[3] = 2.66666667
expected.A.loc[13] = 5.76923076
tm.assert_frame_equal(result, expected)

result = df.interpolate(method="cubic")
# GH #15662.
expected.A.loc[3] = 2.81547781
expected.A.loc[13] = 5.52964175
tm.assert_frame_equal(result, expected)

result = df.interpolate(method="nearest")
expected.A.loc[3] = 2
expected.A.loc[13] = 5
tm.assert_frame_equal(result, expected, check_dtype=False)

result = df.interpolate(method="quadratic")
expected.A.loc[3] = 2.82150771
expected.A.loc[13] = 6.12648668
tm.assert_frame_equal(result, expected)

result = df.interpolate(method="slinear")
expected.A.loc[3] = 2.66666667
expected.A.loc[13] = 5.76923077
tm.assert_frame_equal(result, expected)

result = df.interpolate(method="zero")
expected.A.loc[3] = 2.0
expected.A.loc[13] = 5
tm.assert_frame_equal(result, expected, check_dtype=False)

@td.skip_if_no_scipy
def test_interp_alt_scipy(self):
df = DataFrame(
{"A": [1, 2, np.nan, 4, 5, np.nan, 7], "C": [1, 2, 3, 5, 8, 13, 21]}
)
result = df.interpolate(method="barycentric")
expected = df.copy()
expected.loc[2, "A"] = 3
expected.loc[5, "A"] = 6
tm.assert_frame_equal(result, expected)

result = df.interpolate(method="barycentric", downcast="infer")
tm.assert_frame_equal(result, expected.astype(np.int64))

result = df.interpolate(method="krogh")
expectedk = df.copy()
expectedk["A"] = expected["A"]
tm.assert_frame_equal(result, expectedk)

result = df.interpolate(method="pchip")
expected.loc[2, "A"] = 3
expected.loc[5, "A"] = 6.0

tm.assert_frame_equal(result, expected)

def test_interp_rowwise(self):
df = DataFrame(
{
0: [1, 2, np.nan, 4],
1: [2, 3, 4, np.nan],
2: [np.nan, 4, 5, 6],
3: [4, np.nan, 6, 7],
4: [1, 2, 3, 4],
}
)
result = df.interpolate(axis=1)
expected = df.copy()
expected.loc[3, 1] = 5
expected.loc[0, 2] = 3
expected.loc[1, 3] = 3
expected[4] = expected[4].astype(np.float64)
tm.assert_frame_equal(result, expected)

result = df.interpolate(axis=1, method="values")
tm.assert_frame_equal(result, expected)

result = df.interpolate(axis=0)
expected = df.interpolate()
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
"axis_name, axis_number",
[
pytest.param("rows", 0, id="rows_0"),
pytest.param("index", 0, id="index_0"),
pytest.param("columns", 1, id="columns_1"),
],
)
def test_interp_axis_names(self, axis_name, axis_number):
# GH 29132: test axis names
data = {0: [0, np.nan, 6], 1: [1, np.nan, 7], 2: [2, 5, 8]}

df = DataFrame(data, dtype=np.float64)
result = df.interpolate(axis=axis_name, method="linear")
expected = df.interpolate(axis=axis_number, method="linear")
tm.assert_frame_equal(result, expected)

def test_rowwise_alt(self):
df = DataFrame(
{
0: [0, 0.5, 1.0, np.nan, 4, 8, np.nan, np.nan, 64],
1: [1, 2, 3, 4, 3, 2, 1, 0, -1],
}
)
df.interpolate(axis=0)
# TODO: assert something?

@pytest.mark.parametrize(
"check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)]
)
def test_interp_leading_nans(self, check_scipy):
df = DataFrame(
{"A": [np.nan, np.nan, 0.5, 0.25, 0], "B": [np.nan, -3, -3.5, np.nan, -4]}
)
result = df.interpolate()
expected = df.copy()
expected["B"].loc[3] = -3.75
tm.assert_frame_equal(result, expected)

if check_scipy:
result = df.interpolate(method="polynomial", order=1)
tm.assert_frame_equal(result, expected)

def test_interp_raise_on_only_mixed(self):
df = DataFrame(
{
"A": [1, 2, np.nan, 4],
"B": ["a", "b", "c", "d"],
"C": [np.nan, 2, 5, 7],
"D": [np.nan, np.nan, 9, 9],
"E": [1, 2, 3, 4],
}
)
with pytest.raises(TypeError):
df.interpolate(axis=1)

def test_interp_raise_on_all_object_dtype(self):
# GH 22985
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, dtype="object")
msg = (
"Cannot interpolate with all object-dtype columns "
"in the DataFrame. Try setting at least one "
"column to a numeric dtype."
)
with pytest.raises(TypeError, match=msg):
df.interpolate()

def test_interp_inplace(self):
df = DataFrame({"a": [1.0, 2.0, np.nan, 4.0]})
expected = DataFrame({"a": [1.0, 2.0, 3.0, 4.0]})
result = df.copy()
result["a"].interpolate(inplace=True)
tm.assert_frame_equal(result, expected)

result = df.copy()
result["a"].interpolate(inplace=True, downcast="infer")
tm.assert_frame_equal(result, expected.astype("int64"))

def test_interp_inplace_row(self):
# GH 10395
result = DataFrame(
{"a": [1.0, 2.0, 3.0, 4.0], "b": [np.nan, 2.0, 3.0, 4.0], "c": [3, 2, 2, 2]}
)
expected = result.interpolate(method="linear", axis=1, inplace=False)
result.interpolate(method="linear", axis=1, inplace=True)
tm.assert_frame_equal(result, expected)

def test_interp_ignore_all_good(self):
# GH
df = DataFrame(
{
"A": [1, 2, np.nan, 4],
"B": [1, 2, 3, 4],
"C": [1.0, 2.0, np.nan, 4.0],
"D": [1.0, 2.0, 3.0, 4.0],
}
)
expected = DataFrame(
{
"A": np.array([1, 2, 3, 4], dtype="float64"),
"B": np.array([1, 2, 3, 4], dtype="int64"),
"C": np.array([1.0, 2.0, 3, 4.0], dtype="float64"),
"D": np.array([1.0, 2.0, 3.0, 4.0], dtype="float64"),
}
)

result = df.interpolate(downcast=None)
tm.assert_frame_equal(result, expected)

# all good
result = df[["B", "D"]].interpolate(downcast=None)
tm.assert_frame_equal(result, df[["B", "D"]])

@pytest.mark.parametrize("axis", [0, 1])
def test_interp_time_inplace_axis(self, axis):
# GH 9687
periods = 5
idx = date_range(start="2014-01-01", periods=periods)
data = np.random.rand(periods, periods)
data[data < 0.5] = np.nan
expected = DataFrame(index=idx, columns=idx, data=data)

result = expected.interpolate(axis=0, method="time")
expected.interpolate(axis=0, method="time", inplace=True)
tm.assert_frame_equal(result, expected)
Loading