Skip to content

TST: collect .insert tests #32909

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions pandas/tests/frame/indexing/test_insert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""
test_insert is specifically for the DataFrame.insert method; not to be
confused with tests with "insert" in their names that are really testing
__setitem__.
"""
import numpy as np
import pytest

from pandas import DataFrame, Index
import pandas._testing as tm


class TestDataFrameInsert:
def test_insert(self):
df = DataFrame(
np.random.randn(5, 3), index=np.arange(5), columns=["c", "b", "a"]
)

df.insert(0, "foo", df["a"])
tm.assert_index_equal(df.columns, Index(["foo", "c", "b", "a"]))
tm.assert_series_equal(df["a"], df["foo"], check_names=False)

df.insert(2, "bar", df["c"])
tm.assert_index_equal(df.columns, Index(["foo", "c", "bar", "b", "a"]))
tm.assert_almost_equal(df["c"], df["bar"], check_names=False)

with pytest.raises(ValueError, match="already exists"):
df.insert(1, "a", df["b"])

msg = "cannot insert c, already exists"
with pytest.raises(ValueError, match=msg):
df.insert(1, "c", df["b"])

df.columns.name = "some_name"
# preserve columns name field
df.insert(0, "baz", df["c"])
assert df.columns.name == "some_name"

def test_insert_column_bug_4032(self):

# GH#4032, inserting a column and renaming causing errors
df = DataFrame({"b": [1.1, 2.2]})

df = df.rename(columns={})
df.insert(0, "a", [1, 2])
result = df.rename(columns={})

str(result)
expected = DataFrame([[1, 1.1], [2, 2.2]], columns=["a", "b"])
tm.assert_frame_equal(result, expected)

df.insert(0, "c", [1.3, 2.3])
result = df.rename(columns={})

str(result)
expected = DataFrame([[1.3, 1, 1.1], [2.3, 2, 2.2]], columns=["c", "a", "b"])
tm.assert_frame_equal(result, expected)

def test_insert_with_columns_dups(self):
# GH#14291
df = DataFrame()
df.insert(0, "A", ["g", "h", "i"], allow_duplicates=True)
df.insert(0, "A", ["d", "e", "f"], allow_duplicates=True)
df.insert(0, "A", ["a", "b", "c"], allow_duplicates=True)
exp = DataFrame(
[["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]], columns=["A", "A", "A"]
)
tm.assert_frame_equal(df, exp)
45 changes: 5 additions & 40 deletions pandas/tests/frame/test_mutate_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
import numpy as np
import pytest

from pandas import DataFrame, Index, MultiIndex, Series
from pandas import DataFrame, MultiIndex, Series
import pandas._testing as tm

# Column add, remove, delete.


class TestDataFrameMutateColumns:
def test_insert_error_msmgs(self):
def test_setitem_error_msmgs(self):

# GH 7432
df = DataFrame(
Expand All @@ -30,7 +30,7 @@ def test_insert_error_msmgs(self):
with pytest.raises(TypeError, match=msg):
df["gr"] = df.groupby(["b", "c"]).count()

def test_insert_benchmark(self):
def test_setitem_benchmark(self):
# from the vb_suite/frame_methods/frame_insert_columns
N = 10
K = 5
Expand All @@ -41,18 +41,12 @@ def test_insert_benchmark(self):
expected = DataFrame(np.repeat(new_col, K).reshape(N, K), index=range(N))
tm.assert_frame_equal(df, expected)

def test_insert(self):
def test_setitem_different_dtype(self):
df = DataFrame(
np.random.randn(5, 3), index=np.arange(5), columns=["c", "b", "a"]
)

df.insert(0, "foo", df["a"])
tm.assert_index_equal(df.columns, Index(["foo", "c", "b", "a"]))
tm.assert_series_equal(df["a"], df["foo"], check_names=False)

df.insert(2, "bar", df["c"])
tm.assert_index_equal(df.columns, Index(["foo", "c", "bar", "b", "a"]))
tm.assert_almost_equal(df["c"], df["bar"], check_names=False)

# diff dtype

Expand Down Expand Up @@ -82,17 +76,7 @@ def test_insert(self):
)
tm.assert_series_equal(result, expected)

with pytest.raises(ValueError, match="already exists"):
df.insert(1, "a", df["b"])
msg = "cannot insert c, already exists"
with pytest.raises(ValueError, match=msg):
df.insert(1, "c", df["b"])

df.columns.name = "some_name"
# preserve columns name field
df.insert(0, "baz", df["c"])
assert df.columns.name == "some_name"

def test_setitem_empty_columns(self):
# GH 13522
df = DataFrame(index=["A", "B", "C"])
df["X"] = df.index
Expand Down Expand Up @@ -165,22 +149,3 @@ def test_pop_non_unique_cols(self):
assert "b" in df.columns
assert "a" not in df.columns
assert len(df.index) == 2

def test_insert_column_bug_4032(self):

# GH4032, inserting a column and renaming causing errors
df = DataFrame({"b": [1.1, 2.2]})
df = df.rename(columns={})
df.insert(0, "a", [1, 2])

result = df.rename(columns={})
str(result)
expected = DataFrame([[1, 1.1], [2, 2.2]], columns=["a", "b"])
tm.assert_frame_equal(result, expected)
df.insert(0, "c", [1.3, 2.3])

result = df.rename(columns={})
str(result)

expected = DataFrame([[1.3, 1, 1.1], [2.3, 2, 2.2]], columns=["c", "a", "b"])
tm.assert_frame_equal(result, expected)
11 changes: 0 additions & 11 deletions pandas/tests/frame/test_nonunique_indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,14 +513,3 @@ def test_set_value_by_index(self):

df.iloc[:, 0] = 3
tm.assert_series_equal(df.iloc[:, 1], expected)

def test_insert_with_columns_dups(self):
# GH 14291
df = pd.DataFrame()
df.insert(0, "A", ["g", "h", "i"], allow_duplicates=True)
df.insert(0, "A", ["d", "e", "f"], allow_duplicates=True)
df.insert(0, "A", ["a", "b", "c"], allow_duplicates=True)
exp = pd.DataFrame(
[["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]], columns=["A", "A", "A"]
)
tm.assert_frame_equal(df, exp)
91 changes: 0 additions & 91 deletions pandas/tests/indexes/timedeltas/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,97 +189,6 @@ def test_take_fill_value(self):


class TestTimedeltaIndex:
def test_insert_empty(self):
# Corner case inserting with length zero doesnt raise IndexError
idx = timedelta_range("1 Day", periods=3)
td = idx[0]

idx[:0].insert(0, td)
idx[:0].insert(1, td)
idx[:0].insert(-1, td)

def test_insert(self):

idx = TimedeltaIndex(["4day", "1day", "2day"], name="idx")

result = idx.insert(2, timedelta(days=5))
exp = TimedeltaIndex(["4day", "1day", "5day", "2day"], name="idx")
tm.assert_index_equal(result, exp)

# insertion of non-datetime should coerce to object index
result = idx.insert(1, "inserted")
expected = Index(
[Timedelta("4day"), "inserted", Timedelta("1day"), Timedelta("2day")],
name="idx",
)
assert not isinstance(result, TimedeltaIndex)
tm.assert_index_equal(result, expected)
assert result.name == expected.name

idx = timedelta_range("1day 00:00:01", periods=3, freq="s", name="idx")

# preserve freq
expected_0 = TimedeltaIndex(
["1day", "1day 00:00:01", "1day 00:00:02", "1day 00:00:03"],
name="idx",
freq="s",
)
expected_3 = TimedeltaIndex(
["1day 00:00:01", "1day 00:00:02", "1day 00:00:03", "1day 00:00:04"],
name="idx",
freq="s",
)

# reset freq to None
expected_1_nofreq = TimedeltaIndex(
["1day 00:00:01", "1day 00:00:01", "1day 00:00:02", "1day 00:00:03"],
name="idx",
freq=None,
)
expected_3_nofreq = TimedeltaIndex(
["1day 00:00:01", "1day 00:00:02", "1day 00:00:03", "1day 00:00:05"],
name="idx",
freq=None,
)

cases = [
(0, Timedelta("1day"), expected_0),
(-3, Timedelta("1day"), expected_0),
(3, Timedelta("1day 00:00:04"), expected_3),
(1, Timedelta("1day 00:00:01"), expected_1_nofreq),
(3, Timedelta("1day 00:00:05"), expected_3_nofreq),
]

for n, d, expected in cases:
result = idx.insert(n, d)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq

@pytest.mark.parametrize(
"null", [None, np.nan, np.timedelta64("NaT"), pd.NaT, pd.NA]
)
def test_insert_nat(self, null):
# GH 18295 (test missing)
idx = timedelta_range("1day", "3day")
result = idx.insert(1, null)
expected = TimedeltaIndex(["1day", pd.NaT, "2day", "3day"])
tm.assert_index_equal(result, expected)

def test_insert_invalid_na(self):
idx = TimedeltaIndex(["4day", "1day", "2day"], name="idx")
with pytest.raises(TypeError, match="incompatible label"):
idx.insert(0, np.datetime64("NaT"))

def test_insert_dont_cast_strings(self):
# To match DatetimeIndex and PeriodIndex behavior, dont try to
# parse strings to Timedelta
idx = timedelta_range("1day", "3day")

result = idx.insert(0, "1 Day")
assert result.dtype == object
assert result[0] == "1 Day"

def test_delete(self):
idx = timedelta_range(start="1 Days", periods=5, freq="D", name="idx")

Expand Down
101 changes: 101 additions & 0 deletions pandas/tests/indexes/timedeltas/test_insert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
from datetime import timedelta

import numpy as np
import pytest

import pandas as pd
from pandas import Index, Timedelta, TimedeltaIndex, timedelta_range
import pandas._testing as tm


class TestTimedeltaIndexInsert:
def test_insert(self):

idx = TimedeltaIndex(["4day", "1day", "2day"], name="idx")

result = idx.insert(2, timedelta(days=5))
exp = TimedeltaIndex(["4day", "1day", "5day", "2day"], name="idx")
tm.assert_index_equal(result, exp)

# insertion of non-datetime should coerce to object index
result = idx.insert(1, "inserted")
expected = Index(
[Timedelta("4day"), "inserted", Timedelta("1day"), Timedelta("2day")],
name="idx",
)
assert not isinstance(result, TimedeltaIndex)
tm.assert_index_equal(result, expected)
assert result.name == expected.name

idx = timedelta_range("1day 00:00:01", periods=3, freq="s", name="idx")

# preserve freq
expected_0 = TimedeltaIndex(
["1day", "1day 00:00:01", "1day 00:00:02", "1day 00:00:03"],
name="idx",
freq="s",
)
expected_3 = TimedeltaIndex(
["1day 00:00:01", "1day 00:00:02", "1day 00:00:03", "1day 00:00:04"],
name="idx",
freq="s",
)

# reset freq to None
expected_1_nofreq = TimedeltaIndex(
["1day 00:00:01", "1day 00:00:01", "1day 00:00:02", "1day 00:00:03"],
name="idx",
freq=None,
)
expected_3_nofreq = TimedeltaIndex(
["1day 00:00:01", "1day 00:00:02", "1day 00:00:03", "1day 00:00:05"],
name="idx",
freq=None,
)

cases = [
(0, Timedelta("1day"), expected_0),
(-3, Timedelta("1day"), expected_0),
(3, Timedelta("1day 00:00:04"), expected_3),
(1, Timedelta("1day 00:00:01"), expected_1_nofreq),
(3, Timedelta("1day 00:00:05"), expected_3_nofreq),
]

for n, d, expected in cases:
result = idx.insert(n, d)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq

@pytest.mark.parametrize(
"null", [None, np.nan, np.timedelta64("NaT"), pd.NaT, pd.NA]
)
def test_insert_nat(self, null):
# GH 18295 (test missing)
idx = timedelta_range("1day", "3day")
result = idx.insert(1, null)
expected = TimedeltaIndex(["1day", pd.NaT, "2day", "3day"])
tm.assert_index_equal(result, expected)

def test_insert_invalid_na(self):
idx = TimedeltaIndex(["4day", "1day", "2day"], name="idx")
with pytest.raises(TypeError, match="incompatible label"):
idx.insert(0, np.datetime64("NaT"))

def test_insert_dont_cast_strings(self):
# To match DatetimeIndex and PeriodIndex behavior, dont try to
# parse strings to Timedelta
idx = timedelta_range("1day", "3day")

result = idx.insert(0, "1 Day")
assert result.dtype == object
assert result[0] == "1 Day"

def test_insert_empty(self):
# Corner case inserting with length zero doesnt raise IndexError
idx = timedelta_range("1 Day", periods=3)
td = idx[0]

idx[:0].insert(0, td)
idx[:0].insert(1, td)
idx[:0].insert(-1, td)