-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
[ArrayManager] TST: get tests running for /tests/frame #39700
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
016ce1a
62fb0d5
7adb166
21f59af
a07bd95
53765e5
58bc5c7
8714c44
8a51723
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,7 @@ | |
import pytz | ||
|
||
from pandas.compat import np_version_under1p19 | ||
import pandas.util._test_decorators as td | ||
|
||
from pandas.core.dtypes.common import is_integer_dtype | ||
from pandas.core.dtypes.dtypes import ( | ||
|
@@ -165,7 +166,10 @@ def test_constructor_cast_failure(self): | |
df["foo"] = np.ones((4, 2)).tolist() | ||
|
||
# this is not ok | ||
msg = "Wrong number of items passed 2, placement implies 1" | ||
msg = ( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you use the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, but I already did that in #39991, which I would like to get merged first, and then will fix the conflics with this PR to resolve this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks |
||
"Wrong number of items passed 2, placement implies 1" | ||
"|expected 1D array, got array" | ||
) | ||
with pytest.raises(ValueError, match=msg): | ||
df["test"] = np.ones((4, 2)) | ||
|
||
|
@@ -180,12 +184,15 @@ def test_constructor_dtype_copy(self): | |
new_df["col1"] = 200.0 | ||
assert orig_df["col1"][0] == 1.0 | ||
|
||
def test_constructor_dtype_nocast_view(self): | ||
def test_constructor_dtype_nocast_view_dataframe(self): | ||
df = DataFrame([[1, 2]]) | ||
should_be_view = DataFrame(df, dtype=df[0].dtype) | ||
should_be_view[0][0] = 99 | ||
assert df.values[0, 0] == 99 | ||
|
||
@td.skip_array_manager_invalid_test # TODO(ArrayManager) keep view on 2D array? | ||
def test_constructor_dtype_nocast_view_2d_array(self): | ||
df = DataFrame([[1, 2]]) | ||
should_be_view = DataFrame(df.values, dtype=df[0].dtype) | ||
should_be_view[0][0] = 97 | ||
assert df.values[0, 0] == 97 | ||
|
@@ -1937,6 +1944,7 @@ def test_constructor_frame_copy(self, float_frame): | |
assert (cop["A"] == 5).all() | ||
assert not (float_frame["A"] == 5).all() | ||
|
||
@td.skip_array_manager_invalid_test # TODO(ArrayManager) keep view on 2D array? | ||
def test_constructor_ndarray_copy(self, float_frame): | ||
df = DataFrame(float_frame.values) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,14 +26,16 @@ | |
|
||
|
||
class TestDataFrameReprInfoEtc: | ||
def test_repr_bytes_61_lines(self): | ||
def test_repr_bytes_61_lines(self, using_array_manager): | ||
# GH#12857 | ||
lets = list("ACDEFGHIJKLMNOP") | ||
slen = 50 | ||
nseqs = 1000 | ||
words = [[np.random.choice(lets) for x in range(slen)] for _ in range(nseqs)] | ||
df = DataFrame(words).astype("U1") | ||
assert (df.dtypes == object).all() | ||
# TODO(Arraymanager) astype("U1") actually gives this dtype instead of object | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. im pretty sure we dont want this behavior? so should xfail for now? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test is not actually testing Now, I assume we should have an astype-specific test about this as well, that could be xfailed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't directly see a test about it in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could move the repr calls up and do the dtype assertion at the end |
||
if not using_array_manager: | ||
assert (df.dtypes == object).all() | ||
|
||
# smoke tests; at one point this raised with 61 but not 60 | ||
repr(df) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,8 @@ | |
import numpy as np | ||
import pytest | ||
|
||
import pandas.util._test_decorators as td | ||
|
||
import pandas as pd | ||
from pandas import ( | ||
DataFrame, | ||
|
@@ -60,12 +62,13 @@ def test_stack_mixed_level(self): | |
expected = expected[["a", "b"]] | ||
tm.assert_frame_equal(result, expected) | ||
|
||
def test_unstack_not_consolidated(self): | ||
def test_unstack_not_consolidated(self, using_array_manager): | ||
# Gh#34708 | ||
df = DataFrame({"x": [1, 2, np.NaN], "y": [3.0, 4, np.NaN]}) | ||
df2 = df[["x"]] | ||
df2["y"] = df["y"] | ||
assert len(df2._mgr.blocks) == 2 | ||
if not using_array_manager: | ||
assert len(df2._mgr.blocks) == 2 | ||
|
||
res = df2.unstack() | ||
expected = df.unstack() | ||
|
@@ -747,7 +750,8 @@ def test_unstack_multi_level_rows_and_cols(self): | |
expected = df.unstack(["i3"]).unstack(["i2"]) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
def test_unstack_nan_index(self): # GH7466 | ||
def test_unstack_nan_index1(self): | ||
# GH7466 | ||
def cast(val): | ||
val_str = "" if val != val else val | ||
return f"{val_str:1}" | ||
|
@@ -833,6 +837,7 @@ def verify(df): | |
for col in ["4th", "5th"]: | ||
verify(udf[col]) | ||
|
||
def test_unstack_nan_index2(self): | ||
# GH7403 | ||
df = DataFrame({"A": list("aaaabbbb"), "B": range(8), "C": range(8)}) | ||
df.iloc[3, 1] = np.NaN | ||
|
@@ -875,6 +880,7 @@ def verify(df): | |
right = DataFrame(vals, columns=cols, index=idx) | ||
tm.assert_frame_equal(left, right) | ||
|
||
def test_unstack_nan_index3(self, using_array_manager): | ||
# GH7401 | ||
df = DataFrame( | ||
{ | ||
|
@@ -896,8 +902,13 @@ def verify(df): | |
) | ||
|
||
right = DataFrame(vals, columns=cols, index=idx) | ||
if using_array_manager: | ||
# INFO(ArrayManager) with ArrayManager preserve dtype where possible | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, it was requested by Jeff on one of the previous PRs. It's an explicit comment about behaviour that changed with ArrayManager, but which is not a TODO (since it's not wrong behaviour that still needs to be fixed) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks |
||
cols = right.columns[[1, 2, 3, 5]] | ||
right[cols] = right[cols].astype("int64") | ||
tm.assert_frame_equal(left, right) | ||
|
||
def test_unstack_nan_index4(self): | ||
# GH4862 | ||
vals = [ | ||
["Hg", np.nan, np.nan, 680585148], | ||
|
@@ -938,6 +949,8 @@ def verify(df): | |
left = df.loc[17264:].copy().set_index(["s_id", "dosage", "agent"]) | ||
tm.assert_frame_equal(left.unstack(), right) | ||
|
||
@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) MultiIndex bug | ||
def test_unstack_nan_index5(self): | ||
# GH9497 - multiple unstack with nulls | ||
df = DataFrame( | ||
{ | ||
|
@@ -1453,6 +1466,7 @@ def test_stack_mixed_dtype(self, multiindex_dataframe_random_data): | |
assert result.name is None | ||
assert stacked["bar"].dtype == np.float_ | ||
|
||
@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) groupby | ||
def test_unstack_bug(self): | ||
df = DataFrame( | ||
{ | ||
|
@@ -1887,7 +1901,7 @@ def test_unstack_group_index_overflow(self): | |
result = s.unstack(4) | ||
assert result.shape == (500, 2) | ||
|
||
def test_unstack_with_missing_int_cast_to_float(self): | ||
def test_unstack_with_missing_int_cast_to_float(self, using_array_manager): | ||
# https://github.com/pandas-dev/pandas/issues/37115 | ||
df = DataFrame( | ||
{ | ||
|
@@ -1899,7 +1913,8 @@ def test_unstack_with_missing_int_cast_to_float(self): | |
|
||
# add another int column to get 2 blocks | ||
df["is_"] = 1 | ||
assert len(df._mgr.blocks) == 2 | ||
if not using_array_manager: | ||
assert len(df._mgr.blocks) == 2 | ||
|
||
result = df.unstack("b") | ||
result[("is_", "ca")] = result[("is_", "ca")].fillna(0) | ||
|
@@ -1912,6 +1927,10 @@ def test_unstack_with_missing_int_cast_to_float(self): | |
names=[None, "b"], | ||
), | ||
) | ||
if using_array_manager: | ||
# INFO(ArrayManager) with ArrayManager preserve dtype where possible | ||
expected[("v", "cb")] = expected[("v", "cb")].astype("int64") | ||
expected[("is_", "cb")] = expected[("is_", "cb")].astype("int64") | ||
tm.assert_frame_equal(result, expected) | ||
|
||
def test_unstack_with_level_has_nan(self): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe make a helper function for this type of operation / check (followon ok)