Skip to content

[ArrayManager] TST: get tests running for /tests/frame #39700

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Mar 5, 2021
Merged
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ jobs:
run: |
source activate pandas-dev
pytest pandas/tests/frame/methods --array-manager
pytest pandas/tests/frame/test_reductions.py --array-manager
pytest pandas/tests/frame/test_* --array-manager
pytest pandas/tests/reductions/ --array-manager
pytest pandas/tests/generic/test_generic.py --array-manager
pytest pandas/tests/arithmetic/ --array-manager
Expand Down
8 changes: 7 additions & 1 deletion pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -816,7 +816,13 @@ def insert(self, loc: int, item: Hashable, value, allow_duplicates: bool = False

value = extract_array(value, extract_numpy=True)
if value.ndim == 2:
value = value[0, :]
if value.shape[0] == 1:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe make a helper function for this type of operation / check (followon ok)

value = value[0, :]
else:
raise ValueError(
f"expected 1D array, got array with shape {value.shape}"
)

# TODO self.arrays can be empty
# assert len(value) == len(self.arrays[0])

Expand Down
6 changes: 5 additions & 1 deletion pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import pytest
import pytz

import pandas.util._test_decorators as td

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -686,6 +688,7 @@ def test_df_add_2d_array_collike_broadcasts(self):
result = collike + df
tm.assert_frame_equal(result, expected)

@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) decide on dtypes
def test_df_arith_2d_array_rowlike_broadcasts(self, all_arithmetic_operators):
# GH#23000
opname = all_arithmetic_operators
Expand All @@ -707,6 +710,7 @@ def test_df_arith_2d_array_rowlike_broadcasts(self, all_arithmetic_operators):
result = getattr(df, opname)(rowlike)
tm.assert_frame_equal(result, expected)

@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) decide on dtypes
def test_df_arith_2d_array_collike_broadcasts(self, all_arithmetic_operators):
# GH#23000
opname = all_arithmetic_operators
Expand Down Expand Up @@ -1351,7 +1355,7 @@ def test_strings_to_numbers_comparisons_raises(self, compare_operators_no_eq_ne)

def test_comparison_protected_from_errstate(self):
missing_df = tm.makeDataFrame()
missing_df.iloc[0]["A"] = np.nan
missing_df.loc[missing_df.index[0], "A"] = np.nan
with np.errstate(invalid="ignore"):
expected = missing_df.values < 0
with np.errstate(invalid="raise"):
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/frame/test_block_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import pytest

from pandas.errors import PerformanceWarning
import pandas.util._test_decorators as td

import pandas as pd
from pandas import (
Expand All @@ -30,6 +31,11 @@
# structure


# TODO(ArrayManager) check which of those tests need to be rewritten to test the
# equivalent for ArrayManager
pytestmark = td.skip_array_manager_invalid_test


class TestDataFrameBlockInternals:
def test_setitem_invalidates_datetime_index_freq(self):
# GH#24096 altering a datetime64tz column inplace invalidates the
Expand Down
12 changes: 10 additions & 2 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import pytz

from pandas.compat import np_version_under1p19
import pandas.util._test_decorators as td

from pandas.core.dtypes.common import is_integer_dtype
from pandas.core.dtypes.dtypes import (
Expand Down Expand Up @@ -165,7 +166,10 @@ def test_constructor_cast_failure(self):
df["foo"] = np.ones((4, 2)).tolist()

# this is not ok
msg = "Wrong number of items passed 2, placement implies 1"
msg = (
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you use the "|".join pattern

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but I already did that in #39991, which I would like to get merged first, and then will fix the conflics with this PR to resolve this

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks

"Wrong number of items passed 2, placement implies 1"
"|expected 1D array, got array"
)
with pytest.raises(ValueError, match=msg):
df["test"] = np.ones((4, 2))

Expand All @@ -180,12 +184,15 @@ def test_constructor_dtype_copy(self):
new_df["col1"] = 200.0
assert orig_df["col1"][0] == 1.0

def test_constructor_dtype_nocast_view(self):
def test_constructor_dtype_nocast_view_dataframe(self):
df = DataFrame([[1, 2]])
should_be_view = DataFrame(df, dtype=df[0].dtype)
should_be_view[0][0] = 99
assert df.values[0, 0] == 99

@td.skip_array_manager_invalid_test # TODO(ArrayManager) keep view on 2D array?
def test_constructor_dtype_nocast_view_2d_array(self):
df = DataFrame([[1, 2]])
should_be_view = DataFrame(df.values, dtype=df[0].dtype)
should_be_view[0][0] = 97
assert df.values[0, 0] == 97
Expand Down Expand Up @@ -1937,6 +1944,7 @@ def test_constructor_frame_copy(self, float_frame):
assert (cop["A"] == 5).all()
assert not (float_frame["A"] == 5).all()

@td.skip_array_manager_invalid_test # TODO(ArrayManager) keep view on 2D array?
def test_constructor_ndarray_copy(self, float_frame):
df = DataFrame(float_frame.values)

Expand Down
7 changes: 4 additions & 3 deletions pandas/tests/frame/test_nonunique_indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ def test_multi_dtype2(self):
expected = DataFrame([[1, 2, "foo", "bar"]], columns=["a", "a.1", "a.2", "a.3"])
tm.assert_frame_equal(df, expected)

def test_dups_across_blocks(self):
def test_dups_across_blocks(self, using_array_manager):
# dups across blocks
df_float = DataFrame(np.random.randn(10, 3), dtype="float64")
df_int = DataFrame(np.random.randn(10, 3), dtype="int64")
Expand All @@ -302,8 +302,9 @@ def test_dups_across_blocks(self):
)
df = pd.concat([df_float, df_int, df_bool, df_object, df_dt], axis=1)

assert len(df._mgr.blknos) == len(df.columns)
assert len(df._mgr.blklocs) == len(df.columns)
if not using_array_manager:
assert len(df._mgr.blknos) == len(df.columns)
assert len(df._mgr.blklocs) == len(df.columns)

# testing iloc
for i in range(len(df.columns)):
Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/frame/test_repr_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,16 @@


class TestDataFrameReprInfoEtc:
def test_repr_bytes_61_lines(self):
def test_repr_bytes_61_lines(self, using_array_manager):
# GH#12857
lets = list("ACDEFGHIJKLMNOP")
slen = 50
nseqs = 1000
words = [[np.random.choice(lets) for x in range(slen)] for _ in range(nseqs)]
df = DataFrame(words).astype("U1")
assert (df.dtypes == object).all()
# TODO(Arraymanager) astype("U1") actually gives this dtype instead of object
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

im pretty sure we dont want this behavior? so should xfail for now?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test is not actually testing astype, but rather repr, so I would prefer to run the rest of the test (which actually passes) without having this line error.

Now, I assume we should have an astype-specific test about this as well, that could be xfailed.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't directly see a test about it in frame/methods/test_astype.py, so I will add a test for that there. It's not very clear, though, what the expected behaviour should be.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could move the repr calls up and do the dtype assertion at the end

if not using_array_manager:
assert (df.dtypes == object).all()

# smoke tests; at one point this raised with 61 but not 60
repr(df)
Expand Down
29 changes: 24 additions & 5 deletions pandas/tests/frame/test_stack_unstack.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -60,12 +62,13 @@ def test_stack_mixed_level(self):
expected = expected[["a", "b"]]
tm.assert_frame_equal(result, expected)

def test_unstack_not_consolidated(self):
def test_unstack_not_consolidated(self, using_array_manager):
# Gh#34708
df = DataFrame({"x": [1, 2, np.NaN], "y": [3.0, 4, np.NaN]})
df2 = df[["x"]]
df2["y"] = df["y"]
assert len(df2._mgr.blocks) == 2
if not using_array_manager:
assert len(df2._mgr.blocks) == 2

res = df2.unstack()
expected = df.unstack()
Expand Down Expand Up @@ -747,7 +750,8 @@ def test_unstack_multi_level_rows_and_cols(self):
expected = df.unstack(["i3"]).unstack(["i2"])
tm.assert_frame_equal(result, expected)

def test_unstack_nan_index(self): # GH7466
def test_unstack_nan_index1(self):
# GH7466
def cast(val):
val_str = "" if val != val else val
return f"{val_str:1}"
Expand Down Expand Up @@ -833,6 +837,7 @@ def verify(df):
for col in ["4th", "5th"]:
verify(udf[col])

def test_unstack_nan_index2(self):
# GH7403
df = DataFrame({"A": list("aaaabbbb"), "B": range(8), "C": range(8)})
df.iloc[3, 1] = np.NaN
Expand Down Expand Up @@ -875,6 +880,7 @@ def verify(df):
right = DataFrame(vals, columns=cols, index=idx)
tm.assert_frame_equal(left, right)

def test_unstack_nan_index3(self, using_array_manager):
# GH7401
df = DataFrame(
{
Expand All @@ -896,8 +902,13 @@ def verify(df):
)

right = DataFrame(vals, columns=cols, index=idx)
if using_array_manager:
# INFO(ArrayManager) with ArrayManager preserve dtype where possible
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is INFO(ArrayManager) a pattern i should know?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, it was requested by Jeff on one of the previous PRs. It's an explicit comment about behaviour that changed with ArrayManager, but which is not a TODO (since it's not wrong behaviour that still needs to be fixed)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks

cols = right.columns[[1, 2, 3, 5]]
right[cols] = right[cols].astype("int64")
tm.assert_frame_equal(left, right)

def test_unstack_nan_index4(self):
# GH4862
vals = [
["Hg", np.nan, np.nan, 680585148],
Expand Down Expand Up @@ -938,6 +949,8 @@ def verify(df):
left = df.loc[17264:].copy().set_index(["s_id", "dosage", "agent"])
tm.assert_frame_equal(left.unstack(), right)

@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) MultiIndex bug
def test_unstack_nan_index5(self):
# GH9497 - multiple unstack with nulls
df = DataFrame(
{
Expand Down Expand Up @@ -1453,6 +1466,7 @@ def test_stack_mixed_dtype(self, multiindex_dataframe_random_data):
assert result.name is None
assert stacked["bar"].dtype == np.float_

@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) groupby
def test_unstack_bug(self):
df = DataFrame(
{
Expand Down Expand Up @@ -1887,7 +1901,7 @@ def test_unstack_group_index_overflow(self):
result = s.unstack(4)
assert result.shape == (500, 2)

def test_unstack_with_missing_int_cast_to_float(self):
def test_unstack_with_missing_int_cast_to_float(self, using_array_manager):
# https://github.com/pandas-dev/pandas/issues/37115
df = DataFrame(
{
Expand All @@ -1899,7 +1913,8 @@ def test_unstack_with_missing_int_cast_to_float(self):

# add another int column to get 2 blocks
df["is_"] = 1
assert len(df._mgr.blocks) == 2
if not using_array_manager:
assert len(df._mgr.blocks) == 2

result = df.unstack("b")
result[("is_", "ca")] = result[("is_", "ca")].fillna(0)
Expand All @@ -1912,6 +1927,10 @@ def test_unstack_with_missing_int_cast_to_float(self):
names=[None, "b"],
),
)
if using_array_manager:
# INFO(ArrayManager) with ArrayManager preserve dtype where possible
expected[("v", "cb")] = expected[("v", "cb")].astype("int64")
expected[("is_", "cb")] = expected[("is_", "cb")].astype("int64")
tm.assert_frame_equal(result, expected)

def test_unstack_with_level_has_nan(self):
Expand Down