From 6832755037039485655f423a3d4d78a95bf528cc Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Tue, 26 Dec 2023 00:44:22 +0100 Subject: [PATCH] DEPR: Remove array manager branches from tests --- pandas/conftest.py | 8 -- pandas/tests/apply/test_frame_apply.py | 4 +- pandas/tests/arithmetic/test_numeric.py | 8 +- pandas/tests/arithmetic/test_timedelta64.py | 9 +- pandas/tests/copy_view/test_array.py | 12 +-- pandas/tests/copy_view/test_constructors.py | 9 +- pandas/tests/copy_view/test_indexing.py | 90 +++++-------------- pandas/tests/copy_view/test_methods.py | 28 +++--- pandas/tests/copy_view/test_replace.py | 4 +- pandas/tests/extension/base/setitem.py | 6 +- pandas/tests/frame/indexing/test_indexing.py | 14 +-- pandas/tests/frame/indexing/test_insert.py | 9 +- pandas/tests/frame/indexing/test_setitem.py | 17 ++-- pandas/tests/frame/indexing/test_xs.py | 18 +--- pandas/tests/frame/methods/test_equals.py | 4 +- .../tests/frame/methods/test_interpolate.py | 14 +-- pandas/tests/frame/methods/test_quantile.py | 65 +++----------- pandas/tests/frame/methods/test_shift.py | 8 +- .../tests/frame/methods/test_sort_values.py | 5 +- pandas/tests/frame/test_arithmetic.py | 14 +-- pandas/tests/frame/test_constructors.py | 65 ++++---------- pandas/tests/frame/test_nonunique_indexes.py | 7 +- pandas/tests/frame/test_reductions.py | 19 +--- pandas/tests/frame/test_stack_unstack.py | 25 ++---- pandas/tests/groupby/test_groupby.py | 4 +- pandas/tests/groupby/test_reductions.py | 5 +- .../indexing/test_chaining_and_caching.py | 35 ++------ pandas/tests/indexing/test_iloc.py | 23 ++--- pandas/tests/indexing/test_indexing.py | 8 +- pandas/tests/indexing/test_loc.py | 8 +- pandas/tests/indexing/test_partial.py | 4 +- pandas/tests/io/test_parquet.py | 7 +- pandas/tests/reshape/concat/test_append.py | 13 +-- pandas/tests/reshape/concat/test_concat.py | 10 +-- pandas/tests/reshape/concat/test_datetimes.py | 23 ++--- pandas/tests/reshape/merge/test_merge.py | 9 +- pandas/tests/reshape/test_crosstab.py | 5 +- pandas/tests/reshape/test_pivot.py | 10 +-- pandas/tests/reshape/test_pivot_multilevel.py | 6 +- pandas/tests/series/methods/test_reindex.py | 2 +- pandas/tests/series/test_constructors.py | 7 +- pandas/tests/series/test_reductions.py | 17 ++-- 42 files changed, 171 insertions(+), 487 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 983272d79081e..046cda259eefd 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1877,14 +1877,6 @@ def indexer_ial(request): return request.param -@pytest.fixture -def using_array_manager() -> bool: - """ - Fixture to check if the array manager is being used. - """ - return _get_option("mode.data_manager", silent=True) == "array" - - @pytest.fixture def using_copy_on_write() -> bool: """ diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index b7eac6b8f0ea1..0839f005305a5 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1487,7 +1487,7 @@ def test_apply_dtype(col): tm.assert_series_equal(result, expected) -def test_apply_mutating(using_array_manager, using_copy_on_write, warn_copy_on_write): +def test_apply_mutating(using_copy_on_write, warn_copy_on_write): # GH#35462 case where applied func pins a new BlockManager to a row df = DataFrame({"a": range(100), "b": range(100, 200)}) df_orig = df.copy() @@ -1505,7 +1505,7 @@ def func(row): result = df.apply(func, axis=1) tm.assert_frame_equal(result, expected) - if using_copy_on_write or using_array_manager: + if using_copy_on_write: # INFO(CoW) With copy on write, mutating a viewing row doesn't mutate the parent # INFO(ArrayManager) With BlockManager, the row is a view and mutated in place, # with ArrayManager the row is not a view, and thus not mutated in place diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index d8c1786b6b422..ebcd7cbd963d7 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -586,16 +586,12 @@ def test_df_div_zero_series_does_not_commute(self): # ------------------------------------------------------------------ # Mod By Zero - def test_df_mod_zero_df(self, using_array_manager): + def test_df_mod_zero_df(self): # GH#3590, modulo as ints df = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}) # this is technically wrong, as the integer portion is coerced to float first = Series([0, 0, 0, 0]) - if not using_array_manager: - # INFO(ArrayManager) BlockManager doesn't preserve dtype per column - # while ArrayManager performs op column-wisedoes and thus preserves - # dtype if possible - first = first.astype("float64") + first = first.astype("float64") second = Series([np.nan, np.nan, np.nan, 0]) expected = pd.DataFrame({"first": first, "second": second}) result = df % df diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 007d1e670e1e0..b2007209dd5b9 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -1736,9 +1736,7 @@ def test_td64_div_object_mixed_result(self, box_with_array): # ------------------------------------------------------------------ # __floordiv__, __rfloordiv__ - def test_td64arr_floordiv_td64arr_with_nat( - self, box_with_array, using_array_manager - ): + def test_td64arr_floordiv_td64arr_with_nat(self, box_with_array): # GH#35529 box = box_with_array xbox = np.ndarray if box is pd.array else box @@ -1751,11 +1749,6 @@ def test_td64arr_floordiv_td64arr_with_nat( expected = np.array([1.0, 1.0, np.nan], dtype=np.float64) expected = tm.box_expected(expected, xbox) - if box is DataFrame and using_array_manager: - # INFO(ArrayManager) floordiv returns integer, and ArrayManager - # performs ops column-wise and thus preserves int64 dtype for - # columns without missing values - expected[[0, 1]] = expected[[0, 1]].astype("int64") with tm.maybe_produces_warning( RuntimeWarning, box is pd.array, check_stacklevel=False diff --git a/pandas/tests/copy_view/test_array.py b/pandas/tests/copy_view/test_array.py index 9a3f83e0293f5..13f42cce4fe69 100644 --- a/pandas/tests/copy_view/test_array.py +++ b/pandas/tests/copy_view/test_array.py @@ -48,7 +48,7 @@ def test_series_values(using_copy_on_write, method): [lambda df: df.values, lambda df: np.asarray(df)], ids=["values", "asarray"], ) -def test_dataframe_values(using_copy_on_write, using_array_manager, method): +def test_dataframe_values(using_copy_on_write, method): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) df_orig = df.copy() @@ -70,10 +70,7 @@ def test_dataframe_values(using_copy_on_write, using_array_manager, method): else: assert arr.flags.writeable is True arr[0, 0] = 0 - if not using_array_manager: - assert df.iloc[0, 0] == 0 - else: - tm.assert_frame_equal(df, df_orig) + assert df.iloc[0, 0] == 0 def test_series_to_numpy(using_copy_on_write): @@ -157,11 +154,10 @@ def test_dataframe_array_ea_dtypes(using_copy_on_write): assert arr.flags.writeable is True -def test_dataframe_array_string_dtype(using_copy_on_write, using_array_manager): +def test_dataframe_array_string_dtype(using_copy_on_write): df = DataFrame({"a": ["a", "b"]}, dtype="string") arr = np.asarray(df) - if not using_array_manager: - assert np.shares_memory(arr, get_array(df, "a")) + assert np.shares_memory(arr, get_array(df, "a")) if using_copy_on_write: assert arr.flags.writeable is False else: diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py index 1aa458a625028..c325e49e8156e 100644 --- a/pandas/tests/copy_view/test_constructors.py +++ b/pandas/tests/copy_view/test_constructors.py @@ -339,16 +339,11 @@ def test_dataframe_from_dict_of_series_with_dtype(index): @pytest.mark.parametrize("copy", [False, None, True]) -def test_frame_from_numpy_array(using_copy_on_write, copy, using_array_manager): +def test_frame_from_numpy_array(using_copy_on_write, copy): arr = np.array([[1, 2], [3, 4]]) df = DataFrame(arr, copy=copy) - if ( - using_copy_on_write - and copy is not False - or copy is True - or (using_array_manager and copy is None) - ): + if using_copy_on_write and copy is not False or copy is True: assert not np.shares_memory(get_array(df, 0), arr) else: assert np.shares_memory(get_array(df, 0), arr) diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index 6f3850ab64daa..9afc98e558c11 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -140,15 +140,11 @@ def test_subset_row_slice(backend, using_copy_on_write, warn_copy_on_write): @pytest.mark.parametrize( "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] ) -def test_subset_column_slice( - backend, using_copy_on_write, warn_copy_on_write, using_array_manager, dtype -): +def test_subset_column_slice(backend, using_copy_on_write, warn_copy_on_write, dtype): # Case: taking a subset of the columns of a DataFrame using a slice # + afterwards modifying the subset dtype_backend, DataFrame, _ = backend - single_block = ( - dtype == "int64" and dtype_backend == "numpy" - ) and not using_array_manager + single_block = dtype == "int64" and dtype_backend == "numpy" df = DataFrame( {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)} ) @@ -176,7 +172,7 @@ def test_subset_column_slice( tm.assert_frame_equal(subset, expected) # original parent dataframe is not modified (also not for BlockManager case, # except for single block) - if not using_copy_on_write and (using_array_manager or single_block): + if not using_copy_on_write and single_block: df_orig.iloc[0, 1] = 0 tm.assert_frame_equal(df, df_orig) else: @@ -201,7 +197,6 @@ def test_subset_loc_rows_columns( dtype, row_indexer, column_indexer, - using_array_manager, using_copy_on_write, warn_copy_on_write, ): @@ -224,14 +219,7 @@ def test_subset_loc_rows_columns( mutate_parent = ( isinstance(row_indexer, slice) and isinstance(column_indexer, slice) - and ( - using_array_manager - or ( - dtype == "int64" - and dtype_backend == "numpy" - and not using_copy_on_write - ) - ) + and (dtype == "int64" and dtype_backend == "numpy" and not using_copy_on_write) ) # modifying the subset never modifies the parent @@ -265,7 +253,6 @@ def test_subset_iloc_rows_columns( dtype, row_indexer, column_indexer, - using_array_manager, using_copy_on_write, warn_copy_on_write, ): @@ -288,14 +275,7 @@ def test_subset_iloc_rows_columns( mutate_parent = ( isinstance(row_indexer, slice) and isinstance(column_indexer, slice) - and ( - using_array_manager - or ( - dtype == "int64" - and dtype_backend == "numpy" - and not using_copy_on_write - ) - ) + and (dtype == "int64" and dtype_backend == "numpy" and not using_copy_on_write) ) # modifying the subset never modifies the parent @@ -422,7 +402,7 @@ def test_subset_set_column(backend, using_copy_on_write, warn_copy_on_write): "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] ) def test_subset_set_column_with_loc( - backend, using_copy_on_write, warn_copy_on_write, using_array_manager, dtype + backend, using_copy_on_write, warn_copy_on_write, dtype ): # Case: setting a single column with loc on a viewing subset # -> subset.loc[:, col] = value @@ -440,10 +420,7 @@ def test_subset_set_column_with_loc( subset.loc[:, "a"] = np.array([10, 11], dtype="int64") else: with pd.option_context("chained_assignment", "warn"): - with tm.assert_produces_warning( - None, - raise_on_extra_warnings=not using_array_manager, - ): + with tm.assert_produces_warning(None): subset.loc[:, "a"] = np.array([10, 11], dtype="int64") subset._mgr._verify_integrity() @@ -461,9 +438,7 @@ def test_subset_set_column_with_loc( tm.assert_frame_equal(df, df_orig) -def test_subset_set_column_with_loc2( - backend, using_copy_on_write, warn_copy_on_write, using_array_manager -): +def test_subset_set_column_with_loc2(backend, using_copy_on_write, warn_copy_on_write): # Case: setting a single column with loc on a viewing subset # -> subset.loc[:, col] = value # separate test for case of DataFrame of a single column -> takes a separate @@ -480,10 +455,7 @@ def test_subset_set_column_with_loc2( subset.loc[:, "a"] = 0 else: with pd.option_context("chained_assignment", "warn"): - with tm.assert_produces_warning( - None, - raise_on_extra_warnings=not using_array_manager, - ): + with tm.assert_produces_warning(None): subset.loc[:, "a"] = 0 subset._mgr._verify_integrity() @@ -600,7 +572,6 @@ def test_subset_chained_getitem( method, dtype, using_copy_on_write, - using_array_manager, warn_copy_on_write, ): # Case: creating a subset using multiple, chained getitem calls using views @@ -614,17 +585,10 @@ def test_subset_chained_getitem( # when not using CoW, it depends on whether we have a single block or not # and whether we are slicing the columns -> in that case we have a view test_callspec = request.node.callspec.id - if not using_array_manager: - subset_is_view = test_callspec in ( - "numpy-single-block-column-iloc-slice", - "numpy-single-block-column-loc-slice", - ) - else: - # with ArrayManager, it doesn't matter whether we have - # single vs mixed block or numpy vs nullable dtypes - subset_is_view = test_callspec.endswith( - ("column-iloc-slice", "column-loc-slice") - ) + subset_is_view = test_callspec in ( + "numpy-single-block-column-iloc-slice", + "numpy-single-block-column-loc-slice", + ) # modify subset -> don't modify parent subset = method(df) @@ -726,9 +690,7 @@ def test_subset_chained_getitem_series( assert subset.iloc[0] == 0 -def test_subset_chained_single_block_row( - using_copy_on_write, using_array_manager, warn_copy_on_write -): +def test_subset_chained_single_block_row(using_copy_on_write, warn_copy_on_write): # not parametrizing this for dtype backend, since this explicitly tests single block df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) df_orig = df.copy() @@ -737,7 +699,7 @@ def test_subset_chained_single_block_row( subset = df[:].iloc[0].iloc[0:2] with tm.assert_cow_warning(warn_copy_on_write): subset.iloc[0] = 0 - if using_copy_on_write or using_array_manager: + if using_copy_on_write: tm.assert_frame_equal(df, df_orig) else: assert df.iloc[0, 0] == 0 @@ -747,7 +709,7 @@ def test_subset_chained_single_block_row( with tm.assert_cow_warning(warn_copy_on_write): df.iloc[0, 0] = 0 expected = Series([1, 4], index=["a", "b"], name=0) - if using_copy_on_write or using_array_manager: + if using_copy_on_write: tm.assert_series_equal(subset, expected) else: assert subset.iloc[0] == 0 @@ -967,9 +929,7 @@ def test_del_series(backend): # Accessing column as Series -def test_column_as_series( - backend, using_copy_on_write, warn_copy_on_write, using_array_manager -): +def test_column_as_series(backend, using_copy_on_write, warn_copy_on_write): # Case: selecting a single column now also uses Copy-on-Write dtype_backend, DataFrame, Series = backend df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) @@ -979,7 +939,7 @@ def test_column_as_series( assert np.shares_memory(get_array(s, "a"), get_array(df, "a")) - if using_copy_on_write or using_array_manager: + if using_copy_on_write: s[0] = 0 else: if warn_copy_on_write: @@ -1004,7 +964,7 @@ def test_column_as_series( def test_column_as_series_set_with_upcast( - backend, using_copy_on_write, using_array_manager, warn_copy_on_write + backend, using_copy_on_write, warn_copy_on_write ): # Case: selecting a single column now also uses Copy-on-Write -> when # setting a value causes an upcast, we don't need to update the parent @@ -1019,7 +979,7 @@ def test_column_as_series_set_with_upcast( with pytest.raises(TypeError, match="Invalid value"): s[0] = "foo" expected = Series([1, 2, 3], name="a") - elif using_copy_on_write or warn_copy_on_write or using_array_manager: + elif using_copy_on_write or warn_copy_on_write: # TODO(CoW-warn) assert the FutureWarning for CoW is also raised with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): s[0] = "foo" @@ -1063,7 +1023,6 @@ def test_column_as_series_no_item_cache( method, using_copy_on_write, warn_copy_on_write, - using_array_manager, ): # Case: selecting a single column (which now also uses Copy-on-Write to protect # the view) should always give a new object (i.e. not make use of a cache) @@ -1080,7 +1039,7 @@ def test_column_as_series_no_item_cache( else: assert s1 is s2 - if using_copy_on_write or using_array_manager: + if using_copy_on_write: s1.iloc[0] = 0 elif warn_copy_on_write: with tm.assert_cow_warning(): @@ -1181,9 +1140,7 @@ def test_series_midx_slice(using_copy_on_write, warn_copy_on_write): tm.assert_series_equal(ser, expected) -def test_getitem_midx_slice( - using_copy_on_write, warn_copy_on_write, using_array_manager -): +def test_getitem_midx_slice(using_copy_on_write, warn_copy_on_write): df = DataFrame({("a", "x"): [1, 2], ("a", "y"): 1, ("b", "x"): 2}) df_orig = df.copy() new_df = df[("a",)] @@ -1191,8 +1148,7 @@ def test_getitem_midx_slice( if using_copy_on_write: assert not new_df._mgr._has_no_reference(0) - if not using_array_manager: - assert np.shares_memory(get_array(df, ("a", "x")), get_array(new_df, "x")) + assert np.shares_memory(get_array(df, ("a", "x")), get_array(new_df, "x")) if using_copy_on_write: new_df.iloc[0, 0] = 100 tm.assert_frame_equal(df_orig, df) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 862aebdc70a9d..590829b6dc759 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -119,9 +119,7 @@ def test_copy_shallow(using_copy_on_write, warn_copy_on_write): "set_flags", ], ) -def test_methods_copy_keyword( - request, method, copy, using_copy_on_write, using_array_manager -): +def test_methods_copy_keyword(request, method, copy, using_copy_on_write): index = None if "to_timestamp" in request.node.callspec.id: index = period_range("2012-01-01", freq="D", periods=3) @@ -145,7 +143,7 @@ def test_methods_copy_keyword( if request.node.callspec.id.startswith("reindex-"): # TODO copy=False without CoW still returns a copy in this case - if not using_copy_on_write and not using_array_manager and copy is False: + if not using_copy_on_write and copy is False: share_memory = False if share_memory: @@ -227,11 +225,10 @@ def test_methods_series_copy_keyword(request, method, copy, using_copy_on_write) @pytest.mark.parametrize("copy", [True, None, False]) -def test_transpose_copy_keyword(using_copy_on_write, copy, using_array_manager): +def test_transpose_copy_keyword(using_copy_on_write, copy): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) result = df.transpose(copy=copy) share_memory = using_copy_on_write or copy is False or copy is None - share_memory = share_memory and not using_array_manager if share_memory: assert np.shares_memory(get_array(df, "a"), get_array(result, 0)) @@ -1718,11 +1715,8 @@ def test_get(using_copy_on_write, warn_copy_on_write, key): @pytest.mark.parametrize( "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] ) -def test_xs( - using_copy_on_write, warn_copy_on_write, using_array_manager, axis, key, dtype -): - single_block = (dtype == "int64") and not using_array_manager - is_view = single_block or (using_array_manager and axis == 1) +def test_xs(using_copy_on_write, warn_copy_on_write, axis, key, dtype): + single_block = dtype == "int64" df = DataFrame( {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)} ) @@ -1735,7 +1729,7 @@ def test_xs( elif using_copy_on_write: assert result._mgr._has_no_reference(0) - if using_copy_on_write or (is_view and not warn_copy_on_write): + if using_copy_on_write or (single_block and not warn_copy_on_write): result.iloc[0] = 0 elif warn_copy_on_write: with tm.assert_cow_warning(single_block or axis == 1): @@ -1753,9 +1747,7 @@ def test_xs( @pytest.mark.parametrize("axis", [0, 1]) @pytest.mark.parametrize("key, level", [("l1", 0), (2, 1)]) -def test_xs_multiindex( - using_copy_on_write, warn_copy_on_write, using_array_manager, key, level, axis -): +def test_xs_multiindex(using_copy_on_write, warn_copy_on_write, key, level, axis): arr = np.arange(18).reshape(6, 3) index = MultiIndex.from_product([["l1", "l2"], [1, 2, 3]], names=["lev1", "lev2"]) df = DataFrame(arr, index=index, columns=list("abc")) @@ -1772,7 +1764,7 @@ def test_xs_multiindex( if warn_copy_on_write: warn = FutureWarning if level == 0 else None - elif not using_copy_on_write and not using_array_manager: + elif not using_copy_on_write: warn = SettingWithCopyWarning else: warn = None @@ -1884,12 +1876,12 @@ def test_inplace_arithmetic_series_with_reference( @pytest.mark.parametrize("copy", [True, False]) -def test_transpose(using_copy_on_write, copy, using_array_manager): +def test_transpose(using_copy_on_write, copy): df = DataFrame({"a": [1, 2, 3], "b": 1}) df_orig = df.copy() result = df.transpose(copy=copy) - if not copy and not using_array_manager or using_copy_on_write: + if not copy or using_copy_on_write: assert np.shares_memory(get_array(df, "a"), get_array(result, 0)) else: assert not np.shares_memory(get_array(df, "a"), get_array(result, 0)) diff --git a/pandas/tests/copy_view/test_replace.py b/pandas/tests/copy_view/test_replace.py index 6d16bc3083883..1a0a77b332743 100644 --- a/pandas/tests/copy_view/test_replace.py +++ b/pandas/tests/copy_view/test_replace.py @@ -118,7 +118,7 @@ def test_replace_mask_all_false_second_block(using_copy_on_write): # assert np.shares_memory(get_array(df, "d"), get_array(df2, "d")) -def test_replace_coerce_single_column(using_copy_on_write, using_array_manager): +def test_replace_coerce_single_column(using_copy_on_write): df = DataFrame({"a": [1.5, 2, 3], "b": 100.5}) df_orig = df.copy() @@ -128,7 +128,7 @@ def test_replace_coerce_single_column(using_copy_on_write, using_array_manager): assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) - elif not using_array_manager: + else: assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index ca19845041e23..9dd0a2eba6c0d 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -397,10 +397,6 @@ def test_setitem_series(self, data, full_indexer): def test_setitem_frame_2d_values(self, data): # GH#44514 df = pd.DataFrame({"A": data}) - - # Avoiding using_array_manager fixture - # https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410 - using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager) using_copy_on_write = pd.options.mode.copy_on_write blk_data = df._mgr.arrays[0] @@ -415,7 +411,7 @@ def test_setitem_frame_2d_values(self, data): df.iloc[:] = df.values tm.assert_frame_equal(df, orig) - if not using_array_manager and not using_copy_on_write: + if not using_copy_on_write: # GH#33457 Check that this setting occurred in-place # FIXME(ArrayManager): this should work there too assert df._mgr.arrays[0] is blk_data diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 97e7ae15c6c63..7837adec0c9e0 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -739,7 +739,7 @@ def test_getitem_setitem_boolean_multi(self): expected.loc[[0, 2], [1]] = 5 tm.assert_frame_equal(df, expected) - def test_getitem_setitem_float_labels(self, using_array_manager): + def test_getitem_setitem_float_labels(self): index = Index([1.5, 2, 3, 4, 5]) df = DataFrame(np.random.default_rng(2).standard_normal((5, 5)), index=index) @@ -1110,16 +1110,14 @@ def test_iloc_col(self): expected = df.reindex(columns=df.columns[[1, 2, 4, 6]]) tm.assert_frame_equal(result, expected) - def test_iloc_col_slice_view( - self, using_array_manager, using_copy_on_write, warn_copy_on_write - ): + def test_iloc_col_slice_view(self, using_copy_on_write, warn_copy_on_write): df = DataFrame( np.random.default_rng(2).standard_normal((4, 10)), columns=range(0, 20, 2) ) original = df.copy() subset = df.iloc[:, slice(4, 8)] - if not using_array_manager and not using_copy_on_write: + if not using_copy_on_write: # verify slice is view assert np.shares_memory(df[8]._values, subset[8]._values) @@ -1617,7 +1615,7 @@ def test_setitem(self): ) -def test_object_casting_indexing_wraps_datetimelike(using_array_manager): +def test_object_casting_indexing_wraps_datetimelike(): # GH#31649, check the indexing methods all the way down the stack df = DataFrame( { @@ -1639,10 +1637,6 @@ def test_object_casting_indexing_wraps_datetimelike(using_array_manager): assert isinstance(ser.values[1], Timestamp) assert isinstance(ser.values[2], pd.Timedelta) - if using_array_manager: - # remainder of the test checking BlockManager internals - return - mgr = df._mgr mgr._rebuild_blknos_and_blklocs() arr = mgr.fast_xs(0).array diff --git a/pandas/tests/frame/indexing/test_insert.py b/pandas/tests/frame/indexing/test_insert.py index 7e702bdc993bd..b9fc5dc195026 100644 --- a/pandas/tests/frame/indexing/test_insert.py +++ b/pandas/tests/frame/indexing/test_insert.py @@ -71,15 +71,10 @@ def test_insert_with_columns_dups(self): ) tm.assert_frame_equal(df, exp) - def test_insert_item_cache(self, using_array_manager, using_copy_on_write): + def test_insert_item_cache(self, using_copy_on_write): df = DataFrame(np.random.default_rng(2).standard_normal((4, 3))) ser = df[0] - - if using_array_manager: - expected_warning = None - else: - # with BlockManager warn about high fragmentation of single dtype - expected_warning = PerformanceWarning + expected_warning = PerformanceWarning with tm.assert_produces_warning(expected_warning): for n in range(100): diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index e802a56ecbc81..f031cb2218e31 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1208,9 +1208,7 @@ def test_setitem_always_copy(self, float_frame): assert notna(s[5:10]).all() @pytest.mark.parametrize("consolidate", [True, False]) - def test_setitem_partial_column_inplace( - self, consolidate, using_array_manager, using_copy_on_write - ): + def test_setitem_partial_column_inplace(self, consolidate, using_copy_on_write): # This setting should be in-place, regardless of whether frame is # single-block or multi-block # GH#304 this used to be incorrectly not-inplace, in which case @@ -1220,12 +1218,11 @@ def test_setitem_partial_column_inplace( {"x": [1.1, 2.1, 3.1, 4.1], "y": [5.1, 6.1, 7.1, 8.1]}, index=[0, 1, 2, 3] ) df.insert(2, "z", np.nan) - if not using_array_manager: - if consolidate: - df._consolidate_inplace() - assert len(df._mgr.blocks) == 1 - else: - assert len(df._mgr.blocks) == 2 + if consolidate: + df._consolidate_inplace() + assert len(df._mgr.blocks) == 1 + else: + assert len(df._mgr.blocks) == 2 zvals = df["z"]._values @@ -1254,7 +1251,7 @@ def test_setitem_duplicate_columns_not_inplace(self): @pytest.mark.parametrize( "value", [1, np.array([[1], [1]], dtype="int64"), [[1], [1]]] ) - def test_setitem_same_dtype_not_inplace(self, value, using_array_manager): + def test_setitem_same_dtype_not_inplace(self, value): # GH#39510 cols = ["A", "B"] df = DataFrame(0, index=[0, 1], columns=cols) diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index be809e3a17c8e..535137edd16cf 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -122,9 +122,7 @@ def test_xs_keep_level(self): result = df.xs((2008, "sat"), level=["year", "day"], drop_level=False) tm.assert_frame_equal(result, expected) - def test_xs_view( - self, using_array_manager, using_copy_on_write, warn_copy_on_write - ): + def test_xs_view(self, using_copy_on_write, warn_copy_on_write): # in 0.14 this will return a view if possible a copy otherwise, but # this is numpy dependent @@ -135,13 +133,6 @@ def test_xs_view( with tm.raises_chained_assignment_error(): dm.xs(2)[:] = 20 tm.assert_frame_equal(dm, df_orig) - elif using_array_manager: - # INFO(ArrayManager) with ArrayManager getting a row as a view is - # not possible - msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame" - with pytest.raises(SettingWithCopyError, match=msg): - dm.xs(2)[:] = 20 - assert not (dm.xs(2) == 20).any() else: with tm.raises_chained_assignment_error(): dm.xs(2)[:] = 20 @@ -400,9 +391,7 @@ def test_xs_droplevel_false(self): expected = DataFrame({"a": [1]}) tm.assert_frame_equal(result, expected) - def test_xs_droplevel_false_view( - self, using_array_manager, using_copy_on_write, warn_copy_on_write - ): + def test_xs_droplevel_false_view(self, using_copy_on_write, warn_copy_on_write): # GH#37832 df = DataFrame([[1, 2, 3]], columns=Index(["a", "b", "c"])) result = df.xs("a", axis=1, drop_level=False) @@ -427,9 +416,6 @@ def test_xs_droplevel_false_view( if using_copy_on_write: # with copy on write the subset is never modified expected = DataFrame({"a": [1]}) - elif using_array_manager: - # Here the behavior is consistent - expected = DataFrame({"a": [2]}) else: # FIXME: iloc does not update the array inplace using # "split" path diff --git a/pandas/tests/frame/methods/test_equals.py b/pandas/tests/frame/methods/test_equals.py index d0b9d96cafa0d..88b3fec02182b 100644 --- a/pandas/tests/frame/methods/test_equals.py +++ b/pandas/tests/frame/methods/test_equals.py @@ -14,11 +14,11 @@ def test_dataframe_not_equal(self): df2 = DataFrame({"a": ["s", "d"], "b": [1, 2]}) assert df1.equals(df2) is False - def test_equals_different_blocks(self, using_array_manager, using_infer_string): + def test_equals_different_blocks(self, using_infer_string): # GH#9330 df0 = DataFrame({"A": ["x", "y"], "B": [1, 2], "C": ["w", "z"]}) df1 = df0.reset_index()[["A", "B", "C"]] - if not using_array_manager and not using_infer_string: + if not using_infer_string: # this assert verifies that the above operations have # induced a block rearrangement assert df0._mgr.blocks[0].dtype != df1._mgr.blocks[0].dtype diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py index e0641fcb65bd3..a93931a970687 100644 --- a/pandas/tests/frame/methods/test_interpolate.py +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -52,12 +52,8 @@ def test_interpolate_datetimelike_values(self, frame_or_series): expected_td = frame_or_series(orig - orig[0]) tm.assert_equal(res_td, expected_td) - def test_interpolate_inplace(self, frame_or_series, using_array_manager, request): + def test_interpolate_inplace(self, frame_or_series, request): # GH#44749 - if using_array_manager and frame_or_series is DataFrame: - mark = pytest.mark.xfail(reason=".values-based in-place check is invalid") - request.applymarker(mark) - obj = frame_or_series([1, np.nan, 2]) orig = obj.values @@ -474,14 +470,8 @@ def test_interp_string_axis(self, axis_name, axis_number): @pytest.mark.parametrize("multiblock", [True, False]) @pytest.mark.parametrize("method", ["ffill", "bfill", "pad"]) - def test_interp_fillna_methods( - self, request, axis, multiblock, method, using_array_manager - ): + def test_interp_fillna_methods(self, request, axis, multiblock, method): # GH 12918 - if using_array_manager and axis in (1, "columns"): - # TODO(ArrayManager) support axis=1 - td.mark_array_manager_not_yet_implemented(request) - df = DataFrame( { "A": [1.0, 2.0, 3.0, 4.0, np.nan, 5.0], diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 0f27eae1a3bfc..e31e29b1b0cb2 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -45,9 +45,7 @@ def test_quantile_sparse(self, df, expected): expected = expected.astype("Sparse[float]") tm.assert_series_equal(result, expected) - def test_quantile( - self, datetime_frame, interp_method, using_array_manager, request - ): + def test_quantile(self, datetime_frame, interp_method, request): interpolation, method = interp_method df = datetime_frame result = df.quantile( @@ -63,11 +61,6 @@ def test_quantile( tm.assert_series_equal(result, expected) else: tm.assert_index_equal(result.index, expected.index) - request.applymarker( - pytest.mark.xfail( - using_array_manager, reason="Name set incorrectly for arraymanager" - ) - ) assert result.name == expected.name result = df.quantile( @@ -83,11 +76,6 @@ def test_quantile( tm.assert_series_equal(result, expected) else: tm.assert_index_equal(result.index, expected.index) - request.applymarker( - pytest.mark.xfail( - using_array_manager, reason="Name set incorrectly for arraymanager" - ) - ) assert result.name == expected.name def test_empty(self, interp_method): @@ -97,7 +85,7 @@ def test_empty(self, interp_method): ) assert np.isnan(q["x"]) and np.isnan(q["y"]) - def test_non_numeric_exclusion(self, interp_method, request, using_array_manager): + def test_non_numeric_exclusion(self, interp_method, request): interpolation, method = interp_method df = DataFrame({"col1": ["A", "A", "B", "B"], "col2": [1, 2, 3, 4]}) rs = df.quantile( @@ -106,11 +94,9 @@ def test_non_numeric_exclusion(self, interp_method, request, using_array_manager xp = df.median(numeric_only=True).rename(0.5) if interpolation == "nearest": xp = (xp + 0.5).astype(np.int64) - if method == "table" and using_array_manager: - request.applymarker(pytest.mark.xfail(reason="Axis name incorrectly set.")) tm.assert_series_equal(rs, xp) - def test_axis(self, interp_method, request, using_array_manager): + def test_axis(self, interp_method): # axis interpolation, method = interp_method df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3]) @@ -118,8 +104,6 @@ def test_axis(self, interp_method, request, using_array_manager): expected = Series([1.5, 2.5, 3.5], index=[1, 2, 3], name=0.5) if interpolation == "nearest": expected = expected.astype(np.int64) - if method == "table" and using_array_manager: - request.applymarker(pytest.mark.xfail(reason="Axis name incorrectly set.")) tm.assert_series_equal(result, expected) result = df.quantile( @@ -134,7 +118,7 @@ def test_axis(self, interp_method, request, using_array_manager): expected = expected.astype(np.int64) tm.assert_frame_equal(result, expected, check_index_type=True) - def test_axis_numeric_only_true(self, interp_method, request, using_array_manager): + def test_axis_numeric_only_true(self, interp_method): # We may want to break API in the future to change this # so that we exclude non-numeric along the same axis # See GH #7312 @@ -146,11 +130,9 @@ def test_axis_numeric_only_true(self, interp_method, request, using_array_manage expected = Series([3.0, 4.0], index=[0, 1], name=0.5) if interpolation == "nearest": expected = expected.astype(np.int64) - if method == "table" and using_array_manager: - request.applymarker(pytest.mark.xfail(reason="Axis name incorrectly set.")) tm.assert_series_equal(result, expected) - def test_quantile_date_range(self, interp_method, request, using_array_manager): + def test_quantile_date_range(self, interp_method): # GH 2460 interpolation, method = interp_method dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific") @@ -163,12 +145,10 @@ def test_quantile_date_range(self, interp_method, request, using_array_manager): expected = Series( ["2016-01-02 00:00:00"], name=0.5, dtype="datetime64[ns, US/Pacific]" ) - if method == "table" and using_array_manager: - request.applymarker(pytest.mark.xfail(reason="Axis name incorrectly set.")) tm.assert_series_equal(result, expected) - def test_quantile_axis_mixed(self, interp_method, request, using_array_manager): + def test_quantile_axis_mixed(self, interp_method): # mixed on axis=1 interpolation, method = interp_method df = DataFrame( @@ -185,8 +165,6 @@ def test_quantile_axis_mixed(self, interp_method, request, using_array_manager): expected = Series([1.5, 2.5, 3.5], name=0.5) if interpolation == "nearest": expected -= 0.5 - if method == "table" and using_array_manager: - request.applymarker(pytest.mark.xfail(reason="Axis name incorrectly set.")) tm.assert_series_equal(result, expected) # must raise @@ -194,11 +172,9 @@ def test_quantile_axis_mixed(self, interp_method, request, using_array_manager): with pytest.raises(TypeError, match=msg): df.quantile(0.5, axis=1, numeric_only=False) - def test_quantile_axis_parameter(self, interp_method, request, using_array_manager): + def test_quantile_axis_parameter(self, interp_method): # GH 9543/9544 interpolation, method = interp_method - if method == "table" and using_array_manager: - request.applymarker(pytest.mark.xfail(reason="Axis name incorrectly set.")) df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3]) result = df.quantile(0.5, axis=0, interpolation=interpolation, method=method) @@ -312,7 +288,7 @@ def test_quantile_interpolation_int(self, int_frame): assert q1["A"] == np.percentile(df["A"], 10) tm.assert_series_equal(q, q1) - def test_quantile_multi(self, interp_method, request, using_array_manager): + def test_quantile_multi(self, interp_method): interpolation, method = interp_method df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]], columns=["a", "b", "c"]) result = df.quantile([0.25, 0.5], interpolation=interpolation, method=method) @@ -323,11 +299,9 @@ def test_quantile_multi(self, interp_method, request, using_array_manager): ) if interpolation == "nearest": expected = expected.astype(np.int64) - if method == "table" and using_array_manager: - request.applymarker(pytest.mark.xfail(reason="Axis name incorrectly set.")) tm.assert_frame_equal(result, expected) - def test_quantile_multi_axis_1(self, interp_method, request, using_array_manager): + def test_quantile_multi_axis_1(self, interp_method): interpolation, method = interp_method df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]], columns=["a", "b", "c"]) result = df.quantile( @@ -338,8 +312,6 @@ def test_quantile_multi_axis_1(self, interp_method, request, using_array_manager ) if interpolation == "nearest": expected = expected.astype(np.int64) - if method == "table" and using_array_manager: - request.applymarker(pytest.mark.xfail(reason="Axis name incorrectly set.")) tm.assert_frame_equal(result, expected) def test_quantile_multi_empty(self, interp_method): @@ -443,10 +415,8 @@ def test_quantile_invalid(self, invalid, datetime_frame, interp_method): with pytest.raises(ValueError, match=msg): datetime_frame.quantile(invalid, interpolation=interpolation, method=method) - def test_quantile_box(self, interp_method, request, using_array_manager): + def test_quantile_box(self, interp_method): interpolation, method = interp_method - if method == "table" and using_array_manager: - request.applymarker(pytest.mark.xfail(reason="Axis name incorrectly set.")) df = DataFrame( { "A": [ @@ -574,10 +544,8 @@ def test_quantile_box_nat(self): ) tm.assert_frame_equal(res, exp) - def test_quantile_nan(self, interp_method, request, using_array_manager): + def test_quantile_nan(self, interp_method): interpolation, method = interp_method - if method == "table" and using_array_manager: - request.applymarker(pytest.mark.xfail(reason="Axis name incorrectly set.")) # GH 14357 - float block where some cols have missing values df = DataFrame({"a": np.arange(1, 6.0), "b": np.arange(1, 6.0)}) df.iloc[-1, 1] = np.nan @@ -621,10 +589,8 @@ def test_quantile_nan(self, interp_method, request, using_array_manager): exp = DataFrame({"a": [3.0, 4.0], "b": [np.nan, np.nan]}, index=[0.5, 0.75]) tm.assert_frame_equal(res, exp) - def test_quantile_nat(self, interp_method, request, using_array_manager, unit): + def test_quantile_nat(self, interp_method, unit): interpolation, method = interp_method - if method == "table" and using_array_manager: - request.applymarker(pytest.mark.xfail(reason="Axis name incorrectly set.")) # full NaT column df = DataFrame({"a": [pd.NaT, pd.NaT, pd.NaT]}, dtype=f"M8[{unit}]") @@ -757,9 +723,7 @@ def test_quantile_empty_no_columns(self, interp_method): expected.columns.name = "captain tightpants" tm.assert_frame_equal(result, expected) - def test_quantile_item_cache( - self, using_array_manager, interp_method, using_copy_on_write - ): + def test_quantile_item_cache(self, interp_method, using_copy_on_write): # previous behavior incorrect retained an invalid _item_cache entry interpolation, method = interp_method df = DataFrame( @@ -767,8 +731,7 @@ def test_quantile_item_cache( ) df["D"] = df["A"] * 2 ser = df["A"] - if not using_array_manager: - assert len(df._mgr.blocks) == 2 + assert len(df._mgr.blocks) == 2 df.quantile(numeric_only=False, interpolation=interpolation, method=method) diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index b21aa2d687682..907ff67eac7a1 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -423,13 +423,12 @@ def test_shift_duplicate_columns(self): tm.assert_frame_equal(shifted[0], shifted[1]) tm.assert_frame_equal(shifted[0], shifted[2]) - def test_shift_axis1_multiple_blocks(self, using_array_manager): + def test_shift_axis1_multiple_blocks(self): # GH#35488 df1 = DataFrame(np.random.default_rng(2).integers(1000, size=(5, 3))) df2 = DataFrame(np.random.default_rng(2).integers(1000, size=(5, 2))) df3 = pd.concat([df1, df2], axis=1) - if not using_array_manager: - assert len(df3._mgr.blocks) == 2 + assert len(df3._mgr.blocks) == 2 result = df3.shift(2, axis=1) @@ -449,8 +448,7 @@ def test_shift_axis1_multiple_blocks(self, using_array_manager): # Case with periods < 0 # rebuild df3 because `take` call above consolidated df3 = pd.concat([df1, df2], axis=1) - if not using_array_manager: - assert len(df3._mgr.blocks) == 2 + assert len(df3._mgr.blocks) == 2 result = df3.shift(-2, axis=1) expected = df3.take([2, 3, 4, -1, -1], axis=1) diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index f2f02058a534e..be75efcdfe9d3 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -598,15 +598,14 @@ def test_sort_values_nat_na_position_default(self): result = expected.sort_values(["A", "date"]) tm.assert_frame_equal(result, expected) - def test_sort_values_item_cache(self, using_array_manager, using_copy_on_write): + def test_sort_values_item_cache(self, using_copy_on_write): # previous behavior incorrect retained an invalid _item_cache entry df = DataFrame( np.random.default_rng(2).standard_normal((4, 3)), columns=["A", "B", "C"] ) df["D"] = df["A"] * 2 ser = df["A"] - if not using_array_manager: - assert len(df._mgr.blocks) == 2 + assert len(df._mgr.blocks) == 2 df.sort_values(by="A") diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 42ce658701355..ecaf826c46d9b 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -13,8 +13,6 @@ from pandas._config import using_pyarrow_string_dtype -import pandas.util._test_decorators as td - import pandas as pd from pandas import ( DataFrame, @@ -894,15 +892,11 @@ def test_df_add_2d_array_collike_broadcasts(self): tm.assert_frame_equal(result, expected) def test_df_arith_2d_array_rowlike_broadcasts( - self, request, all_arithmetic_operators, using_array_manager + self, request, all_arithmetic_operators ): # GH#23000 opname = all_arithmetic_operators - if using_array_manager and opname in ("__rmod__", "__rfloordiv__"): - # TODO(ArrayManager) decide on dtypes - td.mark_array_manager_not_yet_implemented(request) - arr = np.arange(6).reshape(3, 2) df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"]) @@ -921,15 +915,11 @@ def test_df_arith_2d_array_rowlike_broadcasts( tm.assert_frame_equal(result, expected) def test_df_arith_2d_array_collike_broadcasts( - self, request, all_arithmetic_operators, using_array_manager + self, request, all_arithmetic_operators ): # GH#23000 opname = all_arithmetic_operators - if using_array_manager and opname in ("__rmod__", "__rfloordiv__"): - # TODO(ArrayManager) decide on dtypes - td.mark_array_manager_not_yet_implemented(request) - arr = np.arange(6).reshape(3, 2) df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"]) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 6e818d79d5ba8..8ff69472ea113 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -84,16 +84,15 @@ def test_constructor_from_ndarray_with_str_dtype(self): expected = DataFrame(arr.astype(str), dtype=object) tm.assert_frame_equal(df, expected) - def test_constructor_from_2d_datetimearray(self, using_array_manager): + def test_constructor_from_2d_datetimearray(self): dti = date_range("2016-01-01", periods=6, tz="US/Pacific") dta = dti._data.reshape(3, 2) df = DataFrame(dta) expected = DataFrame({0: dta[:, 0], 1: dta[:, 1]}) tm.assert_frame_equal(df, expected) - if not using_array_manager: - # GH#44724 big performance hit if we de-consolidate - assert len(df._mgr.blocks) == 1 + # GH#44724 big performance hit if we de-consolidate + assert len(df._mgr.blocks) == 1 def test_constructor_dict_with_tzaware_scalar(self): # GH#42505 @@ -310,10 +309,10 @@ def test_constructor_dtype_nocast_view_dataframe( assert df.values[0, 0] == 99 def test_constructor_dtype_nocast_view_2d_array( - self, using_array_manager, using_copy_on_write, warn_copy_on_write + self, using_copy_on_write, warn_copy_on_write ): df = DataFrame([[1, 2], [3, 4]], dtype="int64") - if not using_array_manager and not using_copy_on_write: + if not using_copy_on_write: should_be_view = DataFrame(df.values, dtype=df[0].dtype) # TODO(CoW-warn) this should warn # with tm.assert_cow_warning(warn_copy_on_write): @@ -2147,35 +2146,19 @@ def test_constructor_frame_shallow_copy(self, float_frame): cop.index = np.arange(len(cop)) tm.assert_frame_equal(float_frame, orig) - def test_constructor_ndarray_copy( - self, float_frame, using_array_manager, using_copy_on_write - ): - if not using_array_manager: - arr = float_frame.values.copy() - df = DataFrame(arr) - - arr[5] = 5 - if using_copy_on_write: - assert not (df.values[5] == 5).all() - else: - assert (df.values[5] == 5).all() + def test_constructor_ndarray_copy(self, float_frame, using_copy_on_write): + arr = float_frame.values.copy() + df = DataFrame(arr) - df = DataFrame(arr, copy=True) - arr[6] = 6 - assert not (df.values[6] == 6).all() + arr[5] = 5 + if using_copy_on_write: + assert not (df.values[5] == 5).all() else: - arr = float_frame.values.copy() - # default: copy to ensure contiguous arrays - df = DataFrame(arr) - assert df._mgr.arrays[0].flags.c_contiguous - arr[0, 0] = 100 - assert df.iloc[0, 0] != 100 - - # manually specify copy=False - df = DataFrame(arr, copy=False) - assert not df._mgr.arrays[0].flags.c_contiguous - arr[0, 0] = 1000 - assert df.iloc[0, 0] == 1000 + assert (df.values[5] == 5).all() + + df = DataFrame(arr, copy=True) + arr[6] = 6 + assert not (df.values[6] == 6).all() def test_constructor_series_copy(self, float_frame): series = float_frame._series @@ -2328,15 +2311,10 @@ def test_check_dtype_empty_numeric_column(self, dtype): @pytest.mark.parametrize( "dtype", tm.STRING_DTYPES + tm.BYTES_DTYPES + tm.OBJECT_DTYPES ) - def test_check_dtype_empty_string_column(self, request, dtype, using_array_manager): + def test_check_dtype_empty_string_column(self, request, dtype): # GH24386: Ensure dtypes are set correctly for an empty DataFrame. # Empty DataFrame is generated via dictionary data with non-overlapping columns. data = DataFrame({"a": [1, 2]}, columns=["b"], dtype=dtype) - - if using_array_manager and dtype in tm.BYTES_DTYPES: - # TODO(ArrayManager) astype to bytes dtypes does not yet give object dtype - td.mark_array_manager_not_yet_implemented(request) - assert data.b.dtype.name == "object" def test_to_frame_with_falsey_names(self): @@ -2515,17 +2493,8 @@ def test_dict_nocopy( copy, any_numeric_ea_dtype, any_numpy_dtype, - using_array_manager, using_copy_on_write, ): - if ( - using_array_manager - and not copy - and any_numpy_dtype not in tm.STRING_DTYPES + tm.BYTES_DTYPES - ): - # TODO(ArrayManager) properly honor copy keyword for dict input - td.mark_array_manager_not_yet_implemented(request) - a = np.array([1, 2], dtype=any_numpy_dtype) b = np.array([3, 4], dtype=any_numpy_dtype) if b.dtype.kind in ["S", "U"]: diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 34f172e900ab7..1e9aa2325e880 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -284,7 +284,7 @@ def test_multi_dtype2(self): expected = DataFrame([[1, 2, "foo", "bar"]], columns=["a", "a.1", "a.2", "a.3"]) tm.assert_frame_equal(df, expected) - def test_dups_across_blocks(self, using_array_manager): + def test_dups_across_blocks(self): # dups across blocks df_float = DataFrame( np.random.default_rng(2).standard_normal((10, 3)), dtype="float64" @@ -299,9 +299,8 @@ def test_dups_across_blocks(self, using_array_manager): ) df = pd.concat([df_float, df_int, df_bool, df_object, df_dt], axis=1) - if not using_array_manager: - assert len(df._mgr.blknos) == len(df.columns) - assert len(df._mgr.blklocs) == len(df.columns) + assert len(df._mgr.blknos) == len(df.columns) + assert len(df._mgr.blklocs) == len(df.columns) # testing iloc for i in range(len(df.columns)): diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 66145c32c18d7..512b5d6ace469 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -817,17 +817,8 @@ def test_std_timedelta64_skipna_false(self): @pytest.mark.parametrize( "values", [["2022-01-01", "2022-01-02", pd.NaT, "2022-01-03"], 4 * [pd.NaT]] ) - def test_std_datetime64_with_nat( - self, values, skipna, using_array_manager, request, unit - ): + def test_std_datetime64_with_nat(self, values, skipna, request, unit): # GH#51335 - if using_array_manager and ( - not skipna or all(value is pd.NaT for value in values) - ): - mark = pytest.mark.xfail( - reason="GH#51446: Incorrect type inference on NaT in reduction result" - ) - request.applymarker(mark) dti = to_datetime(values).as_unit(unit) df = DataFrame({"a": dti}) result = df.std(skipna=skipna) @@ -1926,14 +1917,8 @@ def test_df_empty_nullable_min_count_1(self, opname, dtype, exp_dtype): tm.assert_series_equal(result, expected) -def test_sum_timedelta64_skipna_false(using_array_manager, request): +def test_sum_timedelta64_skipna_false(): # GH#17235 - if using_array_manager: - mark = pytest.mark.xfail( - reason="Incorrect type inference on NaT in reduction result" - ) - request.applymarker(mark) - arr = np.arange(8).astype(np.int64).view("m8[s]").reshape(4, 2) arr[-1, -1] = "Nat" diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 6e1e743eb60de..ea66290ab0417 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -72,13 +72,12 @@ def test_stack_mixed_level(self, future_stack): expected = expected[["a", "b"]] tm.assert_frame_equal(result, expected) - def test_unstack_not_consolidated(self, using_array_manager): + def test_unstack_not_consolidated(self): # Gh#34708 df = DataFrame({"x": [1, 2, np.nan], "y": [3.0, 4, np.nan]}) df2 = df[["x"]] df2["y"] = df["y"] - if not using_array_manager: - assert len(df2._mgr.blocks) == 2 + assert len(df2._mgr.blocks) == 2 res = df2.unstack() expected = df.unstack() @@ -969,7 +968,7 @@ def test_unstack_nan_index2(self): right = DataFrame(vals, columns=cols, index=idx) tm.assert_frame_equal(left, right) - def test_unstack_nan_index3(self, using_array_manager): + def test_unstack_nan_index3(self): # GH7401 df = DataFrame( { @@ -991,10 +990,6 @@ def test_unstack_nan_index3(self, using_array_manager): ) right = DataFrame(vals, columns=cols, index=idx) - if using_array_manager: - # INFO(ArrayManager) with ArrayManager preserve dtype where possible - cols = right.columns[[1, 2, 3, 5]] - right[cols] = right[cols].astype(df["C"].dtype) tm.assert_frame_equal(left, right) def test_unstack_nan_index4(self): @@ -1498,7 +1493,7 @@ def test_stack_positional_level_duplicate_column_names(future_stack): tm.assert_frame_equal(result, expected) -def test_unstack_non_slice_like_blocks(using_array_manager): +def test_unstack_non_slice_like_blocks(): # Case where the mgr_locs of a DataFrame's underlying blocks are not slice-like mi = MultiIndex.from_product([range(5), ["A", "B", "C"]]) @@ -1511,8 +1506,7 @@ def test_unstack_non_slice_like_blocks(using_array_manager): }, index=mi, ) - if not using_array_manager: - assert any(not x.mgr_locs.is_slice_like for x in df._mgr.blocks) + assert any(not x.mgr_locs.is_slice_like for x in df._mgr.blocks) res = df.unstack() @@ -2354,7 +2348,7 @@ def test_unstack_group_index_overflow(self, future_stack): result = s.unstack(4) assert result.shape == (500, 2) - def test_unstack_with_missing_int_cast_to_float(self, using_array_manager): + def test_unstack_with_missing_int_cast_to_float(self): # https://github.com/pandas-dev/pandas/issues/37115 df = DataFrame( { @@ -2366,8 +2360,7 @@ def test_unstack_with_missing_int_cast_to_float(self, using_array_manager): # add another int column to get 2 blocks df["is_"] = 1 - if not using_array_manager: - assert len(df._mgr.blocks) == 2 + assert len(df._mgr.blocks) == 2 result = df.unstack("b") result[("is_", "ca")] = result[("is_", "ca")].fillna(0) @@ -2380,10 +2373,6 @@ def test_unstack_with_missing_int_cast_to_float(self, using_array_manager): names=[None, "b"], ), ) - if using_array_manager: - # INFO(ArrayManager) with ArrayManager preserve dtype where possible - expected[("v", "cb")] = expected[("v", "cb")].astype("int64") - expected[("is_", "cb")] = expected[("is_", "cb")].astype("int64") tm.assert_frame_equal(result, expected) def test_unstack_with_level_has_nan(self): diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 4c903e691add1..3cc06ae4d2387 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2050,9 +2050,7 @@ def test_pivot_table_values_key_error(): @pytest.mark.parametrize( "op", ["idxmax", "idxmin", "min", "max", "sum", "prod", "skew"] ) -def test_empty_groupby( - columns, keys, values, method, op, using_array_manager, dropna, using_infer_string -): +def test_empty_groupby(columns, keys, values, method, op, dropna, using_infer_string): # GH8093 & GH26411 override_dtype = None diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py index 425079f943aba..8333dba439be9 100644 --- a/pandas/tests/groupby/test_reductions.py +++ b/pandas/tests/groupby/test_reductions.py @@ -362,7 +362,7 @@ def test_max_min_non_numeric(): assert "ss" in result -def test_max_min_object_multiple_columns(using_array_manager): +def test_max_min_object_multiple_columns(): # GH#41111 case where the aggregation is valid for some columns but not # others; we split object blocks column-wise, consistent with # DataFrame._reduce @@ -375,8 +375,7 @@ def test_max_min_object_multiple_columns(using_array_manager): } ) df._consolidate_inplace() # should already be consolidate, but double-check - if not using_array_manager: - assert len(df._mgr.blocks) == 2 + assert len(df._mgr.blocks) == 2 gb = df.groupby("A") diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index b97df376ac47f..ca796463f4a1e 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -213,7 +213,7 @@ def test_detect_chained_assignment(self, using_copy_on_write): @pytest.mark.arm_slow def test_detect_chained_assignment_raises( - self, using_array_manager, using_copy_on_write, warn_copy_on_write + self, using_copy_on_write, warn_copy_on_write ): # test with the chaining df = DataFrame( @@ -236,7 +236,7 @@ def test_detect_chained_assignment_raises( df["A"][0] = -5 with tm.raises_chained_assignment_error(): df["A"][1] = np.nan - elif not using_array_manager: + else: with pytest.raises(SettingWithCopyError, match=msg): with tm.raises_chained_assignment_error(): df["A"][0] = -5 @@ -246,14 +246,6 @@ def test_detect_chained_assignment_raises( df["A"][1] = np.nan assert df["A"]._is_copy is None - else: - # INFO(ArrayManager) for ArrayManager it doesn't matter that it's - # a mixed dataframe - df["A"][0] = -5 - df["A"][1] = -6 - expected = DataFrame([[-5, 2], [-6, 3]], columns=list("AB")) - expected["B"] = expected["B"].astype("float64") - tm.assert_frame_equal(df, expected) @pytest.mark.arm_slow def test_detect_chained_assignment_fails( @@ -297,7 +289,7 @@ def test_detect_chained_assignment_doc_example( @pytest.mark.arm_slow def test_detect_chained_assignment_object_dtype( - self, using_array_manager, using_copy_on_write, warn_copy_on_write + self, using_copy_on_write, warn_copy_on_write ): expected = DataFrame({"A": [111, "bbb", "ccc"], "B": [1, 2, 3]}) df = DataFrame( @@ -317,18 +309,13 @@ def test_detect_chained_assignment_object_dtype( with tm.raises_chained_assignment_error(): df["A"][0] = 111 tm.assert_frame_equal(df, expected) - elif not using_array_manager: + else: with pytest.raises(SettingWithCopyError, match=msg): with tm.raises_chained_assignment_error(): df["A"][0] = 111 df.loc[0, "A"] = 111 tm.assert_frame_equal(df, expected) - else: - # INFO(ArrayManager) for ArrayManager it doesn't matter that it's - # a mixed dataframe - df["A"][0] = 111 - tm.assert_frame_equal(df, expected) @pytest.mark.arm_slow def test_detect_chained_assignment_is_copy_pickle(self): @@ -453,7 +440,7 @@ def test_detect_chained_assignment_undefined_column( @pytest.mark.arm_slow def test_detect_chained_assignment_changing_dtype( - self, using_array_manager, using_copy_on_write, warn_copy_on_write + self, using_copy_on_write, warn_copy_on_write ): # Mixed type setting but same dtype & changing dtype df = DataFrame( @@ -485,15 +472,9 @@ def test_detect_chained_assignment_changing_dtype( with pytest.raises(SettingWithCopyError, match=msg): df.loc[2]["C"] = "foo" - if not using_array_manager: - with pytest.raises(SettingWithCopyError, match=msg): - with tm.raises_chained_assignment_error(): - df["C"][2] = "foo" - else: - # INFO(ArrayManager) for ArrayManager it doesn't matter if it's - # changing the dtype or not - df["C"][2] = "foo" - assert df.loc[2, "C"] == "foo" + with pytest.raises(SettingWithCopyError, match=msg): + with tm.raises_chained_assignment_error(): + df["C"][2] = "foo" def test_setting_with_copy_bug(self, using_copy_on_write, warn_copy_on_write): # operating on a copy diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 409eca42f404b..a1d8577d534f5 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -72,13 +72,12 @@ class TestiLocBaseIndependent: ], ) @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) - def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manager): + def test_iloc_setitem_fullcol_categorical(self, indexer, key): frame = DataFrame({0: range(3)}, dtype=object) cat = Categorical(["alpha", "beta", "gamma"]) - if not using_array_manager: - assert frame._mgr.blocks[0]._can_hold_element(cat) + assert frame._mgr.blocks[0]._can_hold_element(cat) df = frame.copy() orig_vals = df.values @@ -86,8 +85,7 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manage indexer(df)[key, 0] = cat expected = DataFrame({0: cat}).astype(object) - if not using_array_manager: - assert np.shares_memory(df[0].values, orig_vals) + assert np.shares_memory(df[0].values, orig_vals) tm.assert_frame_equal(df, expected) @@ -520,9 +518,7 @@ def test_iloc_setitem_dups(self): df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(drop=True) tm.assert_frame_equal(df, expected) - def test_iloc_setitem_frame_duplicate_columns_multiple_blocks( - self, using_array_manager - ): + def test_iloc_setitem_frame_duplicate_columns_multiple_blocks(self): # Same as the "assign back to self" check in test_iloc_setitem_dups # but on a DataFrame with multiple blocks df = DataFrame([[0, 1], [2, 3]], columns=["B", "B"]) @@ -530,14 +526,12 @@ def test_iloc_setitem_frame_duplicate_columns_multiple_blocks( # setting float values that can be held by existing integer arrays # is inplace df.iloc[:, 0] = df.iloc[:, 0].astype("f8") - if not using_array_manager: - assert len(df._mgr.blocks) == 1 + assert len(df._mgr.blocks) == 1 # if the assigned values cannot be held by existing integer arrays, # we cast df.iloc[:, 0] = df.iloc[:, 0] + 0.5 - if not using_array_manager: - assert len(df._mgr.blocks) == 2 + assert len(df._mgr.blocks) == 2 expected = df.copy() @@ -632,7 +626,7 @@ def test_iloc_getitem_labelled_frame(self): with pytest.raises(ValueError, match=msg): df.iloc["j", "D"] - def test_iloc_getitem_doc_issue(self, using_array_manager): + def test_iloc_getitem_doc_issue(self): # multi axis slicing issue with single block # surfaced in GH 6059 @@ -662,8 +656,7 @@ def test_iloc_getitem_doc_issue(self, using_array_manager): columns = list(range(0, 8, 2)) df = DataFrame(arr, index=index, columns=columns) - if not using_array_manager: - df._mgr.blocks[0].mgr_locs + df._mgr.blocks[0].mgr_locs result = df.iloc[1:5, 2:4] expected = DataFrame(arr[1:5, 2:4], index=index[1:5], columns=columns[2:4]) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 57f45f867254d..45ec968714aff 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -77,9 +77,7 @@ def test_setitem_ndarray_1d_2(self): "ignore:Series.__getitem__ treating keys as positions is deprecated:" "FutureWarning" ) - def test_getitem_ndarray_3d( - self, index, frame_or_series, indexer_sli, using_array_manager - ): + def test_getitem_ndarray_3d(self, index, frame_or_series, indexer_sli): # GH 25567 obj = gen_obj(frame_or_series, index) idxr = indexer_sli(obj) @@ -88,12 +86,8 @@ def test_getitem_ndarray_3d( msgs = [] if frame_or_series is Series and indexer_sli in [tm.setitem, tm.iloc]: msgs.append(r"Wrong number of dimensions. values.ndim > ndim \[3 > 1\]") - if using_array_manager: - msgs.append("Passed array should be 1-dimensional") if frame_or_series is Series or indexer_sli is tm.iloc: msgs.append(r"Buffer has wrong number of dimensions \(expected 1, got 3\)") - if using_array_manager: - msgs.append("indexer should be 1-dimensional") if indexer_sli is tm.loc or ( frame_or_series is Series and indexer_sli is tm.setitem ): diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index fb0adc56c401b..da10555e60301 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1490,7 +1490,7 @@ def test_loc_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture): result.loc[:, idxer] = expected tm.assert_frame_equal(result, expected) - def test_loc_setitem_time_key(self, using_array_manager): + def test_loc_setitem_time_key(self): index = date_range("2012-01-01", "2012-01-05", freq="30min") df = DataFrame( np.random.default_rng(2).standard_normal((len(index), 5)), index=index @@ -1505,9 +1505,6 @@ def test_loc_setitem_time_key(self, using_array_manager): result = result.loc[akey] expected = df.loc[akey].copy() expected.loc[:] = 0 - if using_array_manager: - # TODO(ArrayManager) we are still overwriting columns - expected = expected.astype(float) tm.assert_frame_equal(result, expected) result = df.copy() @@ -1520,9 +1517,6 @@ def test_loc_setitem_time_key(self, using_array_manager): result = result.loc[bkey] expected = df.loc[bkey].copy() expected.loc[:] = 0 - if using_array_manager: - # TODO(ArrayManager) we are still overwriting columns - expected = expected.astype(float) tm.assert_frame_equal(result, expected) result = df.copy() diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index ca551024b4c1f..b0a041ed5b69c 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -279,7 +279,7 @@ def test_partial_setting(self): s.iat[3] = 5.0 @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") - def test_partial_setting_frame(self, using_array_manager): + def test_partial_setting_frame(self): df_orig = DataFrame( np.arange(6).reshape(3, 2), columns=["A", "B"], dtype="int64" ) @@ -292,8 +292,6 @@ def test_partial_setting_frame(self, using_array_manager): df.iloc[4, 2] = 5.0 msg = "index 2 is out of bounds for axis 0 with size 2" - if using_array_manager: - msg = "list index out of range" with pytest.raises(IndexError, match=msg): df.iat[4, 2] = 5.0 diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index e4b94177eedb2..8fc02cc7799ed 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -1003,7 +1003,7 @@ def test_filter_row_groups(self, pa): result = read_parquet(path, pa, filters=[("a", "==", 0)]) assert len(result) == 1 - def test_read_parquet_manager(self, pa, using_array_manager): + def test_read_parquet_manager(self, pa): # ensure that read_parquet honors the pandas.options.mode.data_manager option df = pd.DataFrame( np.random.default_rng(2).standard_normal((10, 3)), columns=["A", "B", "C"] @@ -1012,10 +1012,7 @@ def test_read_parquet_manager(self, pa, using_array_manager): with tm.ensure_clean() as path: df.to_parquet(path, engine=pa) result = read_parquet(path, pa) - if using_array_manager: - assert isinstance(result._mgr, pd.core.internals.ArrayManager) - else: - assert isinstance(result._mgr, pd.core.internals.BlockManager) + assert isinstance(result._mgr, pd.core.internals.BlockManager) def test_read_dtype_backend_pyarrow_config(self, pa, df_full): import pyarrow diff --git a/pandas/tests/reshape/concat/test_append.py b/pandas/tests/reshape/concat/test_append.py index 81ca227fb7afb..3fb6a3fb61396 100644 --- a/pandas/tests/reshape/concat/test_append.py +++ b/pandas/tests/reshape/concat/test_append.py @@ -328,16 +328,13 @@ def test_append_empty_frame_to_series_with_dateutil_tz(self): result = df._append([ser, ser], ignore_index=True) tm.assert_frame_equal(result, expected) - def test_append_empty_tz_frame_with_datetime64ns(self, using_array_manager): + def test_append_empty_tz_frame_with_datetime64ns(self): # https://github.com/pandas-dev/pandas/issues/35460 df = DataFrame(columns=["a"]).astype("datetime64[ns, UTC]") # pd.NaT gets inferred as tz-naive, so append result is tz-naive result = df._append({"a": pd.NaT}, ignore_index=True) - if using_array_manager: - expected = DataFrame({"a": [pd.NaT]}, dtype=object) - else: - expected = DataFrame({"a": [np.nan]}, dtype=object) + expected = DataFrame({"a": [np.nan]}, dtype=object) tm.assert_frame_equal(result, expected) # also test with typed value to append @@ -356,9 +353,7 @@ def test_append_empty_tz_frame_with_datetime64ns(self, using_array_manager): "dtype_str", ["datetime64[ns, UTC]", "datetime64[ns]", "Int64", "int64"] ) @pytest.mark.parametrize("val", [1, "NaT"]) - def test_append_empty_frame_with_timedelta64ns_nat( - self, dtype_str, val, using_array_manager - ): + def test_append_empty_frame_with_timedelta64ns_nat(self, dtype_str, val): # https://github.com/pandas-dev/pandas/issues/35460 df = DataFrame(columns=["a"]).astype(dtype_str) @@ -366,7 +361,7 @@ def test_append_empty_frame_with_timedelta64ns_nat( result = df._append(other, ignore_index=True) expected = other.astype(object) - if isinstance(val, str) and dtype_str != "int64" and not using_array_manager: + if isinstance(val, str) and dtype_str != "int64": # TODO: expected used to be `other.astype(object)` which is a more # reasonable result. This was changed when tightening # assert_frame_equal's treatment of mismatched NAs to match the diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 9e34d02091e69..2cc91992f1fd7 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -44,7 +44,7 @@ def test_append_concat(self): assert isinstance(result.index, PeriodIndex) assert result.index[0] == s1.index[0] - def test_concat_copy(self, using_array_manager, using_copy_on_write): + def test_concat_copy(self, using_copy_on_write): df = DataFrame(np.random.default_rng(2).standard_normal((4, 3))) df2 = DataFrame(np.random.default_rng(2).integers(0, 10, size=4).reshape(4, 1)) df3 = DataFrame({5: "foo"}, index=range(4)) @@ -72,18 +72,14 @@ def test_concat_copy(self, using_array_manager, using_copy_on_write): elif arr.dtype.kind in ["i", "u"]: assert arr.base is df2._mgr.arrays[0].base elif arr.dtype == object: - if using_array_manager: - # we get the same array object, which has no base - assert arr is df3._mgr.arrays[0] - else: - assert arr.base is not None + assert arr.base is not None # Float block was consolidated. df4 = DataFrame(np.random.default_rng(2).standard_normal((4, 1))) result = concat([df, df2, df3, df4], axis=1, copy=False) for arr in result._mgr.arrays: if arr.dtype.kind == "f": - if using_array_manager or using_copy_on_write: + if using_copy_on_write: # this is a view on some array in either df or df4 assert any( np.shares_memory(arr, other) diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py index 71ddff7438254..77485788faa02 100644 --- a/pandas/tests/reshape/concat/test_datetimes.py +++ b/pandas/tests/reshape/concat/test_datetimes.py @@ -214,9 +214,7 @@ def test_concat_NaT_dataframes(self, tz): @pytest.mark.parametrize("tz1", [None, "UTC"]) @pytest.mark.parametrize("tz2", [None, "UTC"]) @pytest.mark.parametrize("item", [pd.NaT, Timestamp("20150101")]) - def test_concat_NaT_dataframes_all_NaT_axis_0( - self, tz1, tz2, item, using_array_manager - ): + def test_concat_NaT_dataframes_all_NaT_axis_0(self, tz1, tz2, item): # GH 12396 # tz-naive @@ -228,7 +226,7 @@ def test_concat_NaT_dataframes_all_NaT_axis_0( expected = expected.apply(lambda x: x.dt.tz_localize(tz2)) if tz1 != tz2: expected = expected.astype(object) - if item is pd.NaT and not using_array_manager: + if item is pd.NaT: # GH#18463 # TODO: setting nan here is to keep the test passing as we # make assert_frame_equal stricter, but is nan really the @@ -567,7 +565,7 @@ def test_concat_multiindex_datetime_nat(): tm.assert_frame_equal(result, expected) -def test_concat_float_datetime64(using_array_manager): +def test_concat_float_datetime64(): # GH#32934 df_time = DataFrame({"A": pd.array(["2000"], dtype="datetime64[ns]")}) df_float = DataFrame({"A": pd.array([1.0], dtype="float64")}) @@ -592,15 +590,8 @@ def test_concat_float_datetime64(using_array_manager): result = concat([df_time.iloc[:0], df_float]) tm.assert_frame_equal(result, expected) - if not using_array_manager: - expected = DataFrame({"A": pd.array(["2000"], dtype="datetime64[ns]")}) - msg = "The behavior of DataFrame concatenation with empty or all-NA entries" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = concat([df_time, df_float.iloc[:0]]) - tm.assert_frame_equal(result, expected) - else: - expected = DataFrame({"A": pd.array(["2000"], dtype="datetime64[ns]")}).astype( - {"A": "object"} - ) + expected = DataFrame({"A": pd.array(["2000"], dtype="datetime64[ns]")}) + msg = "The behavior of DataFrame concatenation with empty or all-NA entries" + with tm.assert_produces_warning(FutureWarning, match=msg): result = concat([df_time, df_float.iloc[:0]]) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index d7a343ae9f152..9f832c7b1d1ca 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -316,7 +316,7 @@ def test_merge_copy(self): merged["d"] = "peekaboo" assert (right["d"] == "bar").all() - def test_merge_nocopy(self, using_array_manager): + def test_merge_nocopy(self): left = DataFrame({"a": 0, "b": 1}, index=range(10)) right = DataFrame({"c": "foo", "d": "bar"}, index=range(10)) @@ -702,7 +702,7 @@ def _constructor(self): assert isinstance(result, NotADataFrame) - def test_join_append_timedeltas(self, using_array_manager): + def test_join_append_timedeltas(self): # timedelta64 issues with join/merge # GH 5695 @@ -712,8 +712,6 @@ def test_join_append_timedeltas(self, using_array_manager): df = DataFrame(columns=list("dt")) msg = "The behavior of DataFrame concatenation with empty or all-NA entries" warn = FutureWarning - if using_array_manager: - warn = None with tm.assert_produces_warning(warn, match=msg): df = concat([df, d], ignore_index=True) result = concat([df, d], ignore_index=True) @@ -723,9 +721,6 @@ def test_join_append_timedeltas(self, using_array_manager): "t": [timedelta(0, 22500), timedelta(0, 22500)], } ) - if using_array_manager: - # TODO(ArrayManager) decide on exact casting rules in concat - expected = expected.astype(object) tm.assert_frame_equal(result, expected) def test_join_append_timedeltas2(self): diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py index 136e76986df9d..8a30b63cf0e17 100644 --- a/pandas/tests/reshape/test_crosstab.py +++ b/pandas/tests/reshape/test_crosstab.py @@ -459,7 +459,7 @@ def test_crosstab_normalize_arrays(self): ) tm.assert_frame_equal(test_case, norm_sum) - def test_crosstab_with_empties(self, using_array_manager): + def test_crosstab_with_empties(self): # Check handling of empties df = DataFrame( { @@ -484,9 +484,6 @@ def test_crosstab_with_empties(self, using_array_manager): index=Index([1, 2], name="a", dtype="int64"), columns=Index([3, 4], name="b"), ) - if using_array_manager: - # INFO(ArrayManager) column without NaNs can preserve int dtype - nans[3] = nans[3].astype("int64") calculated = crosstab(df.a, df.b, values=df.c, aggfunc="count", normalize=False) tm.assert_frame_equal(nans, calculated) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 18a449b4d0c67..bf2717be4d7ae 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1277,7 +1277,7 @@ def test_pivot_table_with_margins_set_margin_name(self, margin_name, data): margins_name=margin_name, ) - def test_pivot_timegrouper(self, using_array_manager): + def test_pivot_timegrouper(self): df = DataFrame( { "Branch": "A A A A A A A B".split(), @@ -1331,9 +1331,6 @@ def test_pivot_timegrouper(self, using_array_manager): ) expected.index.name = "Date" expected.columns.name = "Buyer" - if using_array_manager: - # INFO(ArrayManager) column without NaNs can preserve int dtype - expected["Carl"] = expected["Carl"].astype("int64") result = pivot_table( df, @@ -2370,7 +2367,7 @@ def test_pivot_table_datetime_warning(self): ) tm.assert_frame_equal(result, expected) - def test_pivot_table_with_mixed_nested_tuples(self, using_array_manager): + def test_pivot_table_with_mixed_nested_tuples(self): # GH 50342 df = DataFrame( { @@ -2434,9 +2431,6 @@ def test_pivot_table_with_mixed_nested_tuples(self, using_array_manager): [["bar", "bar", "foo", "foo"], ["one", "two"] * 2], names=["A", "B"] ), ) - if using_array_manager: - # INFO(ArrayManager) column without NaNs can preserve int dtype - expected["small"] = expected["small"].astype("int64") tm.assert_frame_equal(result, expected) def test_pivot_table_aggfunc_nunique_with_different_values(self): diff --git a/pandas/tests/reshape/test_pivot_multilevel.py b/pandas/tests/reshape/test_pivot_multilevel.py index 08ef29440825f..2c9d54c3db72c 100644 --- a/pandas/tests/reshape/test_pivot_multilevel.py +++ b/pandas/tests/reshape/test_pivot_multilevel.py @@ -197,7 +197,7 @@ def test_pivot_list_like_columns( tm.assert_frame_equal(result, expected) -def test_pivot_multiindexed_rows_and_cols(using_array_manager): +def test_pivot_multiindexed_rows_and_cols(): # GH 36360 df = pd.DataFrame( @@ -225,9 +225,7 @@ def test_pivot_multiindexed_rows_and_cols(using_array_manager): ), index=Index([0, 1], dtype="int64", name="idx_L0"), ) - if not using_array_manager: - # BlockManager does not preserve the dtypes - expected = expected.astype("float64") + expected = expected.astype("float64") tm.assert_frame_equal(res, expected) diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index 6f0c8d751a92a..1a4a390da1323 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -318,7 +318,7 @@ def test_reindex_fill_value(): @td.skip_array_manager_not_yet_implemented @pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"]) @pytest.mark.parametrize("fill_value", ["string", 0, Timedelta(0)]) -def test_reindex_fill_value_datetimelike_upcast(dtype, fill_value, using_array_manager): +def test_reindex_fill_value_datetimelike_upcast(dtype, fill_value): # https://github.com/pandas-dev/pandas/issues/42921 if dtype == "timedelta64[ns]" and fill_value == Timedelta(0): # use the scalar that is not compatible with the dtype for this test diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index da069afe5e709..866bfb995a6d5 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -2187,13 +2187,12 @@ def test_series_constructor_infer_multiindex(self, container, data): class TestSeriesConstructorInternals: - def test_constructor_no_pandas_array(self, using_array_manager): + def test_constructor_no_pandas_array(self): ser = Series([1, 2, 3]) result = Series(ser.array) tm.assert_series_equal(ser, result) - if not using_array_manager: - assert isinstance(result._mgr.blocks[0], NumpyBlock) - assert result._mgr.blocks[0].is_numeric + assert isinstance(result._mgr.blocks[0], NumpyBlock) + assert result._mgr.blocks[0].is_numeric @td.skip_array_manager_invalid_test def test_from_array(self): diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py index 76353ab25fca6..e200f7d9933aa 100644 --- a/pandas/tests/series/test_reductions.py +++ b/pandas/tests/series/test_reductions.py @@ -163,7 +163,7 @@ def test_validate_stat_keepdims(): np.sum(ser, keepdims=True) -def test_mean_with_convertible_string_raises(using_array_manager, using_infer_string): +def test_mean_with_convertible_string_raises(using_infer_string): # GH#44008 ser = Series(["1", "2"]) if using_infer_string: @@ -177,19 +177,15 @@ def test_mean_with_convertible_string_raises(using_array_manager, using_infer_st ser.mean() df = ser.to_frame() - if not using_array_manager: - msg = r"Could not convert \['12'\] to numeric|does not support" + msg = r"Could not convert \['12'\] to numeric|does not support" with pytest.raises(TypeError, match=msg): df.mean() -def test_mean_dont_convert_j_to_complex(using_array_manager): +def test_mean_dont_convert_j_to_complex(): # GH#36703 df = pd.DataFrame([{"db": "J", "numeric": 123}]) - if using_array_manager: - msg = "Could not convert string 'J' to numeric" - else: - msg = r"Could not convert \['J'\] to numeric|does not support" + msg = r"Could not convert \['J'\] to numeric|does not support" with pytest.raises(TypeError, match=msg): df.mean() @@ -204,15 +200,14 @@ def test_mean_dont_convert_j_to_complex(using_array_manager): np.mean(df["db"].astype("string").array) -def test_median_with_convertible_string_raises(using_array_manager): +def test_median_with_convertible_string_raises(): # GH#34671 this _could_ return a string "2", but definitely not float 2.0 msg = r"Cannot convert \['1' '2' '3'\] to numeric|does not support" ser = Series(["1", "2", "3"]) with pytest.raises(TypeError, match=msg): ser.median() - if not using_array_manager: - msg = r"Cannot convert \[\['1' '2' '3'\]\] to numeric|does not support" + msg = r"Cannot convert \[\['1' '2' '3'\]\] to numeric|does not support" df = ser.to_frame() with pytest.raises(TypeError, match=msg): df.median()