diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 06a06484b921a..6d7705fb0f838 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -154,9 +154,10 @@ jobs: PANDAS_DATA_MANAGER: array run: | source activate pandas-dev + pytest pandas/tests/frame/methods pytest pandas/tests/frame/test_constructors.py - pytest pandas/tests/frame/constructors/ + pytest pandas/tests/frame/test_* pytest pandas/tests/frame/test_reductions.py pytest pandas/tests/reductions/ pytest pandas/tests/generic/test_generic.py diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 2f2de9764219b..76cfd77d254f2 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -296,6 +296,7 @@ def test_attrs(self): result = df.rename(columns=str) assert result.attrs == {"version": 1} + @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) setitem (no copy) @pytest.mark.parametrize("allows_duplicate_labels", [True, False, None]) def test_set_flags(self, allows_duplicate_labels, frame_or_series): obj = DataFrame({"A": [1, 2]}) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 44b6d44ee6275..c6816fa6481f4 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -7,6 +7,8 @@ import pytest import pytz +import pandas.util._test_decorators as td + import pandas as pd from pandas import ( DataFrame, @@ -686,6 +688,7 @@ def test_df_add_2d_array_collike_broadcasts(self): result = collike + df tm.assert_frame_equal(result, expected) + @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) decide on dtypes def test_df_arith_2d_array_rowlike_broadcasts(self, all_arithmetic_operators): # GH#23000 opname = all_arithmetic_operators @@ -707,6 +710,7 @@ def test_df_arith_2d_array_rowlike_broadcasts(self, all_arithmetic_operators): result = getattr(df, opname)(rowlike) tm.assert_frame_equal(result, expected) + @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) decide on dtypes def test_df_arith_2d_array_collike_broadcasts(self, all_arithmetic_operators): # GH#23000 opname = all_arithmetic_operators @@ -1351,7 +1355,7 @@ def test_strings_to_numbers_comparisons_raises(self, compare_operators_no_eq_ne) def test_comparison_protected_from_errstate(self): missing_df = tm.makeDataFrame() - missing_df.iloc[0]["A"] = np.nan + missing_df.loc[missing_df.index[0], "A"] = np.nan with np.errstate(invalid="ignore"): expected = missing_df.values < 0 with np.errstate(invalid="raise"): diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 193f1617fbb55..9c9557a442a80 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -9,6 +9,7 @@ import pytest from pandas.errors import PerformanceWarning +import pandas.util._test_decorators as td import pandas as pd from pandas import ( @@ -30,6 +31,11 @@ # structure +# TODO(ArrayManager) check which of those tests need to be rewritten to test the +# equivalent for ArrayManager +pytestmark = td.skip_array_manager_invalid_test + + class TestDataFrameBlockInternals: def test_setitem_invalidates_datetime_index_freq(self): # GH#24096 altering a datetime64tz column inplace invalidates the diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 6a6e2a5aa2636..c9a39eb460cf4 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -291,7 +291,7 @@ def test_multi_dtype2(self): expected = DataFrame([[1, 2, "foo", "bar"]], columns=["a", "a.1", "a.2", "a.3"]) tm.assert_frame_equal(df, expected) - def test_dups_across_blocks(self): + def test_dups_across_blocks(self, using_array_manager): # dups across blocks df_float = DataFrame(np.random.randn(10, 3), dtype="float64") df_int = DataFrame(np.random.randn(10, 3), dtype="int64") @@ -302,8 +302,9 @@ def test_dups_across_blocks(self): ) df = pd.concat([df_float, df_int, df_bool, df_object, df_dt], axis=1) - assert len(df._mgr.blknos) == len(df.columns) - assert len(df._mgr.blklocs) == len(df.columns) + if not using_array_manager: + assert len(df._mgr.blknos) == len(df.columns) + assert len(df._mgr.blklocs) == len(df.columns) # testing iloc for i in range(len(df.columns)): diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index c8131049b51d2..03c5b6e027dac 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -26,14 +26,16 @@ class TestDataFrameReprInfoEtc: - def test_repr_bytes_61_lines(self): + def test_repr_bytes_61_lines(self, using_array_manager): # GH#12857 lets = list("ACDEFGHIJKLMNOP") slen = 50 nseqs = 1000 words = [[np.random.choice(lets) for x in range(slen)] for _ in range(nseqs)] df = DataFrame(words).astype("U1") - assert (df.dtypes == object).all() + # TODO(Arraymanager) astype("U1") actually gives this dtype instead of object + if not using_array_manager: + assert (df.dtypes == object).all() # smoke tests; at one point this raised with 61 but not 60 repr(df) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 9945b739f8a87..fd23ea3a7621c 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -5,6 +5,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + import pandas as pd from pandas import ( DataFrame, @@ -60,12 +62,13 @@ def test_stack_mixed_level(self): expected = expected[["a", "b"]] tm.assert_frame_equal(result, expected) - def test_unstack_not_consolidated(self): + def test_unstack_not_consolidated(self, using_array_manager): # Gh#34708 df = DataFrame({"x": [1, 2, np.NaN], "y": [3.0, 4, np.NaN]}) df2 = df[["x"]] df2["y"] = df["y"] - assert len(df2._mgr.blocks) == 2 + if not using_array_manager: + assert len(df2._mgr.blocks) == 2 res = df2.unstack() expected = df.unstack() @@ -747,7 +750,8 @@ def test_unstack_multi_level_rows_and_cols(self): expected = df.unstack(["i3"]).unstack(["i2"]) tm.assert_frame_equal(result, expected) - def test_unstack_nan_index(self): # GH7466 + def test_unstack_nan_index1(self): + # GH7466 def cast(val): val_str = "" if val != val else val return f"{val_str:1}" @@ -833,6 +837,7 @@ def verify(df): for col in ["4th", "5th"]: verify(udf[col]) + def test_unstack_nan_index2(self): # GH7403 df = DataFrame({"A": list("aaaabbbb"), "B": range(8), "C": range(8)}) df.iloc[3, 1] = np.NaN @@ -875,6 +880,7 @@ def verify(df): right = DataFrame(vals, columns=cols, index=idx) tm.assert_frame_equal(left, right) + def test_unstack_nan_index3(self, using_array_manager): # GH7401 df = DataFrame( { @@ -896,8 +902,13 @@ def verify(df): ) right = DataFrame(vals, columns=cols, index=idx) + if using_array_manager: + # INFO(ArrayManager) with ArrayManager preserve dtype where possible + cols = right.columns[[1, 2, 3, 5]] + right[cols] = right[cols].astype("int64") tm.assert_frame_equal(left, right) + def test_unstack_nan_index4(self): # GH4862 vals = [ ["Hg", np.nan, np.nan, 680585148], @@ -938,6 +949,8 @@ def verify(df): left = df.loc[17264:].copy().set_index(["s_id", "dosage", "agent"]) tm.assert_frame_equal(left.unstack(), right) + @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) MultiIndex bug + def test_unstack_nan_index5(self): # GH9497 - multiple unstack with nulls df = DataFrame( { @@ -1887,7 +1900,7 @@ def test_unstack_group_index_overflow(self): result = s.unstack(4) assert result.shape == (500, 2) - def test_unstack_with_missing_int_cast_to_float(self): + def test_unstack_with_missing_int_cast_to_float(self, using_array_manager): # https://github.com/pandas-dev/pandas/issues/37115 df = DataFrame( { @@ -1899,7 +1912,8 @@ def test_unstack_with_missing_int_cast_to_float(self): # add another int column to get 2 blocks df["is_"] = 1 - assert len(df._mgr.blocks) == 2 + if not using_array_manager: + assert len(df._mgr.blocks) == 2 result = df.unstack("b") result[("is_", "ca")] = result[("is_", "ca")].fillna(0) @@ -1912,6 +1926,10 @@ def test_unstack_with_missing_int_cast_to_float(self): names=[None, "b"], ), ) + if using_array_manager: + # INFO(ArrayManager) with ArrayManager preserve dtype where possible + expected[("v", "cb")] = expected[("v", "cb")].astype("int64") + expected[("is_", "cb")] = expected[("is_", "cb")].astype("int64") tm.assert_frame_equal(result, expected) def test_unstack_with_level_has_nan(self):