From c8697345188c7274c1e86436bd18afdc05f98407 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 21 Mar 2020 11:28:25 -0700 Subject: [PATCH 1/3] implement test_insert --- .../tests/indexes/timedeltas/test_indexing.py | 91 ---------------- .../tests/indexes/timedeltas/test_insert.py | 101 ++++++++++++++++++ 2 files changed, 101 insertions(+), 91 deletions(-) create mode 100644 pandas/tests/indexes/timedeltas/test_insert.py diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index 5dec799832291..5b77e879c71da 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -189,97 +189,6 @@ def test_take_fill_value(self): class TestTimedeltaIndex: - def test_insert_empty(self): - # Corner case inserting with length zero doesnt raise IndexError - idx = timedelta_range("1 Day", periods=3) - td = idx[0] - - idx[:0].insert(0, td) - idx[:0].insert(1, td) - idx[:0].insert(-1, td) - - def test_insert(self): - - idx = TimedeltaIndex(["4day", "1day", "2day"], name="idx") - - result = idx.insert(2, timedelta(days=5)) - exp = TimedeltaIndex(["4day", "1day", "5day", "2day"], name="idx") - tm.assert_index_equal(result, exp) - - # insertion of non-datetime should coerce to object index - result = idx.insert(1, "inserted") - expected = Index( - [Timedelta("4day"), "inserted", Timedelta("1day"), Timedelta("2day")], - name="idx", - ) - assert not isinstance(result, TimedeltaIndex) - tm.assert_index_equal(result, expected) - assert result.name == expected.name - - idx = timedelta_range("1day 00:00:01", periods=3, freq="s", name="idx") - - # preserve freq - expected_0 = TimedeltaIndex( - ["1day", "1day 00:00:01", "1day 00:00:02", "1day 00:00:03"], - name="idx", - freq="s", - ) - expected_3 = TimedeltaIndex( - ["1day 00:00:01", "1day 00:00:02", "1day 00:00:03", "1day 00:00:04"], - name="idx", - freq="s", - ) - - # reset freq to None - expected_1_nofreq = TimedeltaIndex( - ["1day 00:00:01", "1day 00:00:01", "1day 00:00:02", "1day 00:00:03"], - name="idx", - freq=None, - ) - expected_3_nofreq = TimedeltaIndex( - ["1day 00:00:01", "1day 00:00:02", "1day 00:00:03", "1day 00:00:05"], - name="idx", - freq=None, - ) - - cases = [ - (0, Timedelta("1day"), expected_0), - (-3, Timedelta("1day"), expected_0), - (3, Timedelta("1day 00:00:04"), expected_3), - (1, Timedelta("1day 00:00:01"), expected_1_nofreq), - (3, Timedelta("1day 00:00:05"), expected_3_nofreq), - ] - - for n, d, expected in cases: - result = idx.insert(n, d) - tm.assert_index_equal(result, expected) - assert result.name == expected.name - assert result.freq == expected.freq - - @pytest.mark.parametrize( - "null", [None, np.nan, np.timedelta64("NaT"), pd.NaT, pd.NA] - ) - def test_insert_nat(self, null): - # GH 18295 (test missing) - idx = timedelta_range("1day", "3day") - result = idx.insert(1, null) - expected = TimedeltaIndex(["1day", pd.NaT, "2day", "3day"]) - tm.assert_index_equal(result, expected) - - def test_insert_invalid_na(self): - idx = TimedeltaIndex(["4day", "1day", "2day"], name="idx") - with pytest.raises(TypeError, match="incompatible label"): - idx.insert(0, np.datetime64("NaT")) - - def test_insert_dont_cast_strings(self): - # To match DatetimeIndex and PeriodIndex behavior, dont try to - # parse strings to Timedelta - idx = timedelta_range("1day", "3day") - - result = idx.insert(0, "1 Day") - assert result.dtype == object - assert result[0] == "1 Day" - def test_delete(self): idx = timedelta_range(start="1 Days", periods=5, freq="D", name="idx") diff --git a/pandas/tests/indexes/timedeltas/test_insert.py b/pandas/tests/indexes/timedeltas/test_insert.py new file mode 100644 index 0000000000000..b214e009db869 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_insert.py @@ -0,0 +1,101 @@ +from datetime import timedelta + +import numpy as np +import pytest + +import pandas as pd +from pandas import Index, Timedelta, TimedeltaIndex, timedelta_range +import pandas._testing as tm + + +class TestTimedeltaIndexInsert: + def test_insert(self): + + idx = TimedeltaIndex(["4day", "1day", "2day"], name="idx") + + result = idx.insert(2, timedelta(days=5)) + exp = TimedeltaIndex(["4day", "1day", "5day", "2day"], name="idx") + tm.assert_index_equal(result, exp) + + # insertion of non-datetime should coerce to object index + result = idx.insert(1, "inserted") + expected = Index( + [Timedelta("4day"), "inserted", Timedelta("1day"), Timedelta("2day")], + name="idx", + ) + assert not isinstance(result, TimedeltaIndex) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + + idx = timedelta_range("1day 00:00:01", periods=3, freq="s", name="idx") + + # preserve freq + expected_0 = TimedeltaIndex( + ["1day", "1day 00:00:01", "1day 00:00:02", "1day 00:00:03"], + name="idx", + freq="s", + ) + expected_3 = TimedeltaIndex( + ["1day 00:00:01", "1day 00:00:02", "1day 00:00:03", "1day 00:00:04"], + name="idx", + freq="s", + ) + + # reset freq to None + expected_1_nofreq = TimedeltaIndex( + ["1day 00:00:01", "1day 00:00:01", "1day 00:00:02", "1day 00:00:03"], + name="idx", + freq=None, + ) + expected_3_nofreq = TimedeltaIndex( + ["1day 00:00:01", "1day 00:00:02", "1day 00:00:03", "1day 00:00:05"], + name="idx", + freq=None, + ) + + cases = [ + (0, Timedelta("1day"), expected_0), + (-3, Timedelta("1day"), expected_0), + (3, Timedelta("1day 00:00:04"), expected_3), + (1, Timedelta("1day 00:00:01"), expected_1_nofreq), + (3, Timedelta("1day 00:00:05"), expected_3_nofreq), + ] + + for n, d, expected in cases: + result = idx.insert(n, d) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + @pytest.mark.parametrize( + "null", [None, np.nan, np.timedelta64("NaT"), pd.NaT, pd.NA] + ) + def test_insert_nat(self, null): + # GH 18295 (test missing) + idx = timedelta_range("1day", "3day") + result = idx.insert(1, null) + expected = TimedeltaIndex(["1day", pd.NaT, "2day", "3day"]) + tm.assert_index_equal(result, expected) + + def test_insert_invalid_na(self): + idx = TimedeltaIndex(["4day", "1day", "2day"], name="idx") + with pytest.raises(TypeError, match="incompatible label"): + idx.insert(0, np.datetime64("NaT")) + + def test_insert_dont_cast_strings(self): + # To match DatetimeIndex and PeriodIndex behavior, dont try to + # parse strings to Timedelta + idx = timedelta_range("1day", "3day") + + result = idx.insert(0, "1 Day") + assert result.dtype == object + assert result[0] == "1 Day" + + def test_insert_empty(self): + # Corner case inserting with length zero doesnt raise IndexError + idx = timedelta_range("1 Day", periods=3) + td = idx[0] + + idx[:0].insert(0, td) + idx[:0].insert(1, td) + idx[:0].insert(-1, td) From 6a37cb11a21a25d91e0a2abd135d2fab3f8629c7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 21 Mar 2020 11:40:37 -0700 Subject: [PATCH 2/3] implement test_insert --- pandas/tests/frame/indexing/test_insert.py | 57 ++++++++++++++++++++++ pandas/tests/frame/test_mutate_columns.py | 41 ++-------------- 2 files changed, 60 insertions(+), 38 deletions(-) create mode 100644 pandas/tests/frame/indexing/test_insert.py diff --git a/pandas/tests/frame/indexing/test_insert.py b/pandas/tests/frame/indexing/test_insert.py new file mode 100644 index 0000000000000..e4dfc5ed9d518 --- /dev/null +++ b/pandas/tests/frame/indexing/test_insert.py @@ -0,0 +1,57 @@ +""" +test_insert is specifically for the DataFrame.insert method; not to be +confused with tests with "insert" in their names that are really testing +__setitem__. +""" +import numpy as np +import pytest + +from pandas import DataFrame, Index +import pandas._testing as tm + + +class TestDataFrameInsert: + def test_insert(self): + df = DataFrame( + np.random.randn(5, 3), index=np.arange(5), columns=["c", "b", "a"] + ) + + df.insert(0, "foo", df["a"]) + tm.assert_index_equal(df.columns, Index(["foo", "c", "b", "a"])) + tm.assert_series_equal(df["a"], df["foo"], check_names=False) + + df.insert(2, "bar", df["c"]) + tm.assert_index_equal(df.columns, Index(["foo", "c", "bar", "b", "a"])) + tm.assert_almost_equal(df["c"], df["bar"], check_names=False) + + with pytest.raises(ValueError, match="already exists"): + df.insert(1, "a", df["b"]) + + msg = "cannot insert c, already exists" + with pytest.raises(ValueError, match=msg): + df.insert(1, "c", df["b"]) + + df.columns.name = "some_name" + # preserve columns name field + df.insert(0, "baz", df["c"]) + assert df.columns.name == "some_name" + + def test_insert_column_bug_4032(self): + + # GH4032, inserting a column and renaming causing errors + df = DataFrame({"b": [1.1, 2.2]}) + + df = df.rename(columns={}) + df.insert(0, "a", [1, 2]) + result = df.rename(columns={}) + + str(result) + expected = DataFrame([[1, 1.1], [2, 2.2]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + df.insert(0, "c", [1.3, 2.3]) + result = df.rename(columns={}) + + str(result) + expected = DataFrame([[1.3, 1, 1.1], [2.3, 2, 2.2]], columns=["c", "a", "b"]) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index 33f71602f4713..382d44449d64a 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from pandas import DataFrame, Index, MultiIndex, Series +from pandas import DataFrame, MultiIndex, Series import pandas._testing as tm # Column add, remove, delete. @@ -41,18 +41,12 @@ def test_insert_benchmark(self): expected = DataFrame(np.repeat(new_col, K).reshape(N, K), index=range(N)) tm.assert_frame_equal(df, expected) - def test_insert(self): + def test_setitem_different_dtype(self): df = DataFrame( np.random.randn(5, 3), index=np.arange(5), columns=["c", "b", "a"] ) - df.insert(0, "foo", df["a"]) - tm.assert_index_equal(df.columns, Index(["foo", "c", "b", "a"])) - tm.assert_series_equal(df["a"], df["foo"], check_names=False) - df.insert(2, "bar", df["c"]) - tm.assert_index_equal(df.columns, Index(["foo", "c", "bar", "b", "a"])) - tm.assert_almost_equal(df["c"], df["bar"], check_names=False) # diff dtype @@ -82,17 +76,7 @@ def test_insert(self): ) tm.assert_series_equal(result, expected) - with pytest.raises(ValueError, match="already exists"): - df.insert(1, "a", df["b"]) - msg = "cannot insert c, already exists" - with pytest.raises(ValueError, match=msg): - df.insert(1, "c", df["b"]) - - df.columns.name = "some_name" - # preserve columns name field - df.insert(0, "baz", df["c"]) - assert df.columns.name == "some_name" - + def test_setitem_empty_columns(self): # GH 13522 df = DataFrame(index=["A", "B", "C"]) df["X"] = df.index @@ -165,22 +149,3 @@ def test_pop_non_unique_cols(self): assert "b" in df.columns assert "a" not in df.columns assert len(df.index) == 2 - - def test_insert_column_bug_4032(self): - - # GH4032, inserting a column and renaming causing errors - df = DataFrame({"b": [1.1, 2.2]}) - df = df.rename(columns={}) - df.insert(0, "a", [1, 2]) - - result = df.rename(columns={}) - str(result) - expected = DataFrame([[1, 1.1], [2, 2.2]], columns=["a", "b"]) - tm.assert_frame_equal(result, expected) - df.insert(0, "c", [1.3, 2.3]) - - result = df.rename(columns={}) - str(result) - - expected = DataFrame([[1.3, 1, 1.1], [2.3, 2, 2.2]], columns=["c", "a", "b"]) - tm.assert_frame_equal(result, expected) From 0a5e9f9a75b70d269c76b0fd296ee4899fb7501a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 22 Mar 2020 12:03:52 -0700 Subject: [PATCH 3/3] REF: insert tests --- pandas/tests/frame/indexing/test_insert.py | 13 ++++++++++++- pandas/tests/frame/test_mutate_columns.py | 4 ++-- pandas/tests/frame/test_nonunique_indexes.py | 11 ----------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/pandas/tests/frame/indexing/test_insert.py b/pandas/tests/frame/indexing/test_insert.py index e4dfc5ed9d518..622c93d1c2fdc 100644 --- a/pandas/tests/frame/indexing/test_insert.py +++ b/pandas/tests/frame/indexing/test_insert.py @@ -38,7 +38,7 @@ def test_insert(self): def test_insert_column_bug_4032(self): - # GH4032, inserting a column and renaming causing errors + # GH#4032, inserting a column and renaming causing errors df = DataFrame({"b": [1.1, 2.2]}) df = df.rename(columns={}) @@ -55,3 +55,14 @@ def test_insert_column_bug_4032(self): str(result) expected = DataFrame([[1.3, 1, 1.1], [2.3, 2, 2.2]], columns=["c", "a", "b"]) tm.assert_frame_equal(result, expected) + + def test_insert_with_columns_dups(self): + # GH#14291 + df = DataFrame() + df.insert(0, "A", ["g", "h", "i"], allow_duplicates=True) + df.insert(0, "A", ["d", "e", "f"], allow_duplicates=True) + df.insert(0, "A", ["a", "b", "c"], allow_duplicates=True) + exp = DataFrame( + [["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]], columns=["A", "A", "A"] + ) + tm.assert_frame_equal(df, exp) diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index 382d44449d64a..9d1b6abff6241 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -10,7 +10,7 @@ class TestDataFrameMutateColumns: - def test_insert_error_msmgs(self): + def test_setitem_error_msmgs(self): # GH 7432 df = DataFrame( @@ -30,7 +30,7 @@ def test_insert_error_msmgs(self): with pytest.raises(TypeError, match=msg): df["gr"] = df.groupby(["b", "c"]).count() - def test_insert_benchmark(self): + def test_setitem_benchmark(self): # from the vb_suite/frame_methods/frame_insert_columns N = 10 K = 5 diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 233c0f4bd3544..2530886802921 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -513,14 +513,3 @@ def test_set_value_by_index(self): df.iloc[:, 0] = 3 tm.assert_series_equal(df.iloc[:, 1], expected) - - def test_insert_with_columns_dups(self): - # GH 14291 - df = pd.DataFrame() - df.insert(0, "A", ["g", "h", "i"], allow_duplicates=True) - df.insert(0, "A", ["d", "e", "f"], allow_duplicates=True) - df.insert(0, "A", ["a", "b", "c"], allow_duplicates=True) - exp = pd.DataFrame( - [["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]], columns=["A", "A", "A"] - ) - tm.assert_frame_equal(df, exp)