-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
TST: Fixturize tests/frame/test_missing.py #25640
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,7 +14,7 @@ | |
|
||
import pandas as pd | ||
from pandas import Categorical, DataFrame, Series, Timestamp, date_range | ||
from pandas.tests.frame.common import TestData, _check_mixed_float | ||
from pandas.tests.frame.common import _check_mixed_float | ||
import pandas.util.testing as tm | ||
from pandas.util.testing import assert_frame_equal, assert_series_equal | ||
|
||
|
@@ -34,15 +34,15 @@ def _skip_if_no_pchip(): | |
pytest.skip('scipy.interpolate.pchip missing') | ||
|
||
|
||
class TestDataFrameMissingData(TestData): | ||
class TestDataFrameMissingData(): | ||
|
||
def test_dropEmptyRows(self): | ||
N = len(self.frame.index) | ||
def test_dropEmptyRows(self, float_frame): | ||
N = len(float_frame.index) | ||
mat = np.random.randn(N) | ||
mat[:5] = np.nan | ||
|
||
frame = DataFrame({'foo': mat}, index=self.frame.index) | ||
original = Series(mat, index=self.frame.index, name='foo') | ||
frame = DataFrame({'foo': mat}, index=float_frame.index) | ||
original = Series(mat, index=float_frame.index, name='foo') | ||
expected = original.dropna() | ||
inplace_frame1, inplace_frame2 = frame.copy(), frame.copy() | ||
|
||
|
@@ -58,30 +58,30 @@ def test_dropEmptyRows(self): | |
assert_series_equal(smaller_frame['foo'], expected) | ||
assert_series_equal(inplace_frame2['foo'], expected) | ||
|
||
def test_dropIncompleteRows(self): | ||
N = len(self.frame.index) | ||
def test_dropIncompleteRows(self, float_frame): | ||
N = len(float_frame.index) | ||
mat = np.random.randn(N) | ||
mat[:5] = np.nan | ||
|
||
frame = DataFrame({'foo': mat}, index=self.frame.index) | ||
frame = DataFrame({'foo': mat}, index=float_frame.index) | ||
frame['bar'] = 5 | ||
original = Series(mat, index=self.frame.index, name='foo') | ||
original = Series(mat, index=float_frame.index, name='foo') | ||
inp_frame1, inp_frame2 = frame.copy(), frame.copy() | ||
|
||
smaller_frame = frame.dropna() | ||
assert_series_equal(frame['foo'], original) | ||
inp_frame1.dropna(inplace=True) | ||
|
||
exp = Series(mat[5:], index=self.frame.index[5:], name='foo') | ||
exp = Series(mat[5:], index=float_frame.index[5:], name='foo') | ||
tm.assert_series_equal(smaller_frame['foo'], exp) | ||
tm.assert_series_equal(inp_frame1['foo'], exp) | ||
|
||
samesize_frame = frame.dropna(subset=['bar']) | ||
assert_series_equal(frame['foo'], original) | ||
assert (frame['bar'] == 5).all() | ||
inp_frame2.dropna(subset=['bar'], inplace=True) | ||
tm.assert_index_equal(samesize_frame.index, self.frame.index) | ||
tm.assert_index_equal(inp_frame2.index, self.frame.index) | ||
tm.assert_index_equal(samesize_frame.index, float_frame.index) | ||
tm.assert_index_equal(inp_frame2.index, float_frame.index) | ||
|
||
@pytest.mark.skipif(PY2, reason="pytest.raises match regex fails") | ||
def test_dropna(self): | ||
|
@@ -160,17 +160,17 @@ def test_drop_and_dropna_caching(self): | |
df2['A'].drop([1], inplace=True) | ||
assert_series_equal(df2['A'], original.drop([1])) | ||
|
||
def test_dropna_corner(self): | ||
def test_dropna_corner(self, float_frame): | ||
# bad input | ||
msg = "invalid how option: foo" | ||
with pytest.raises(ValueError, match=msg): | ||
self.frame.dropna(how='foo') | ||
float_frame.dropna(how='foo') | ||
msg = "must specify how or thresh" | ||
with pytest.raises(TypeError, match=msg): | ||
self.frame.dropna(how=None) | ||
float_frame.dropna(how=None) | ||
# non-existent column - 8303 | ||
with pytest.raises(KeyError, match=r"^\['X'\]$"): | ||
self.frame.dropna(subset=['A', 'X']) | ||
float_frame.dropna(subset=['A', 'X']) | ||
|
||
def test_dropna_multiple_axes(self): | ||
df = DataFrame([[1, np.nan, 2, 3], | ||
|
@@ -215,42 +215,46 @@ def test_dropna_tz_aware_datetime(self): | |
index=[0, 3]) | ||
assert_frame_equal(result, expected) | ||
|
||
def test_fillna(self): | ||
tf = self.tsframe | ||
def test_fillna_datetime(self, datetime_frame): | ||
tf = datetime_frame | ||
tf.loc[tf.index[:5], 'A'] = np.nan | ||
tf.loc[tf.index[-5:], 'A'] = np.nan | ||
|
||
zero_filled = self.tsframe.fillna(0) | ||
zero_filled = datetime_frame.fillna(0) | ||
assert (zero_filled.loc[zero_filled.index[:5], 'A'] == 0).all() | ||
|
||
padded = self.tsframe.fillna(method='pad') | ||
padded = datetime_frame.fillna(method='pad') | ||
assert np.isnan(padded.loc[padded.index[:5], 'A']).all() | ||
assert (padded.loc[padded.index[-5:], 'A'] == | ||
padded.loc[padded.index[-5], 'A']).all() | ||
|
||
# mixed type | ||
mf = self.mixed_frame | ||
mf.loc[mf.index[5:20], 'foo'] = np.nan | ||
mf.loc[mf.index[-10:], 'A'] = np.nan | ||
result = self.mixed_frame.fillna(value=0) | ||
result = self.mixed_frame.fillna(method='pad') | ||
|
||
msg = "Must specify a fill 'value' or 'method'" | ||
with pytest.raises(ValueError, match=msg): | ||
self.tsframe.fillna() | ||
datetime_frame.fillna() | ||
msg = "Cannot specify both 'value' and 'method'" | ||
with pytest.raises(ValueError, match=msg): | ||
self.tsframe.fillna(5, method='ffill') | ||
datetime_frame.fillna(5, method='ffill') | ||
|
||
def test_fillna_mixed_type(self, float_string_frame): | ||
|
||
mf = float_string_frame | ||
mf.loc[mf.index[5:20], 'foo'] = np.nan | ||
mf.loc[mf.index[-10:], 'A'] = np.nan | ||
result = float_string_frame.fillna(value=0) | ||
result = float_string_frame.fillna(method='pad') # noqa | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rather than noqa can just remove assignment There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, unfortunately, this split (along fixtures) requested by @jreback is uncovering deficiencies in the existing test, in that both results are not tested for anything. Don't know what this should be... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure entirely possible to have some mistakes in the existing tests. I'd suggest adding a TODO here to make a stronger assertion and opening a follow up issue. For time being OK to remove lhs and noqa note; at the very least I suppose this tests that the method doesn't raise |
||
|
||
def test_fillna_mixed_float(self, mixed_float_frame): | ||
|
||
# mixed numeric (but no float16) | ||
mf = self.mixed_float.reindex(columns=['A', 'B', 'D']) | ||
mf = mixed_float_frame.reindex(columns=['A', 'B', 'D']) | ||
mf.loc[mf.index[-10:], 'A'] = np.nan | ||
result = mf.fillna(value=0) | ||
_check_mixed_float(result, dtype=dict(C=None)) | ||
|
||
result = mf.fillna(method='pad') | ||
_check_mixed_float(result, dtype=dict(C=None)) | ||
|
||
def test_fillna_other(self): | ||
# empty frame (GH #2778) | ||
df = DataFrame(columns=['x']) | ||
for m in ['pad', 'backfill']: | ||
|
@@ -464,19 +468,19 @@ def test_fillna_datetime_columns(self): | |
index=pd.date_range('20130110', periods=3)) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
def test_ffill(self): | ||
self.tsframe['A'][:5] = np.nan | ||
self.tsframe['A'][-5:] = np.nan | ||
def test_ffill(self, datetime_frame): | ||
datetime_frame['A'][:5] = np.nan | ||
datetime_frame['A'][-5:] = np.nan | ||
|
||
assert_frame_equal(self.tsframe.ffill(), | ||
self.tsframe.fillna(method='ffill')) | ||
assert_frame_equal(datetime_frame.ffill(), | ||
datetime_frame.fillna(method='ffill')) | ||
|
||
def test_bfill(self): | ||
self.tsframe['A'][:5] = np.nan | ||
self.tsframe['A'][-5:] = np.nan | ||
def test_bfill(self, datetime_frame): | ||
datetime_frame['A'][:5] = np.nan | ||
datetime_frame['A'][-5:] = np.nan | ||
|
||
assert_frame_equal(self.tsframe.bfill(), | ||
self.tsframe.fillna(method='bfill')) | ||
assert_frame_equal(datetime_frame.bfill(), | ||
datetime_frame.fillna(method='bfill')) | ||
|
||
def test_frame_pad_backfill_limit(self): | ||
index = np.arange(10) | ||
|
@@ -602,24 +606,24 @@ def test_fillna_columns(self): | |
expected = df.astype(float).fillna(method='ffill', axis=1) | ||
assert_frame_equal(result, expected) | ||
|
||
def test_fillna_invalid_method(self): | ||
def test_fillna_invalid_method(self, float_frame): | ||
with pytest.raises(ValueError, match='ffil'): | ||
self.frame.fillna(method='ffil') | ||
float_frame.fillna(method='ffil') | ||
|
||
def test_fillna_invalid_value(self): | ||
def test_fillna_invalid_value(self, float_frame): | ||
# list | ||
msg = ("\"value\" parameter must be a scalar or dict, but you passed" | ||
" a \"{}\"") | ||
with pytest.raises(TypeError, match=msg.format('list')): | ||
self.frame.fillna([1, 2]) | ||
float_frame.fillna([1, 2]) | ||
# tuple | ||
with pytest.raises(TypeError, match=msg.format('tuple')): | ||
self.frame.fillna((1, 2)) | ||
float_frame.fillna((1, 2)) | ||
# frame with series | ||
msg = ("\"value\" parameter must be a scalar, dict or Series, but you" | ||
" passed a \"DataFrame\"") | ||
with pytest.raises(TypeError, match=msg): | ||
self.frame.iloc[:, 0].fillna(self.frame) | ||
float_frame.iloc[:, 0].fillna(float_frame) | ||
|
||
def test_fillna_col_reordering(self): | ||
cols = ["COL." + str(i) for i in range(5, 0, -1)] | ||
|
@@ -628,16 +632,16 @@ def test_fillna_col_reordering(self): | |
filled = df.fillna(method='ffill') | ||
assert df.columns.tolist() == filled.columns.tolist() | ||
|
||
def test_fill_corner(self): | ||
mf = self.mixed_frame | ||
def test_fill_corner(self, float_frame, float_string_frame): | ||
mf = float_string_frame | ||
mf.loc[mf.index[5:20], 'foo'] = np.nan | ||
mf.loc[mf.index[-10:], 'A'] = np.nan | ||
|
||
filled = self.mixed_frame.fillna(value=0) | ||
filled = float_string_frame.fillna(value=0) | ||
assert (filled.loc[filled.index[5:20], 'foo'] == 0).all() | ||
del self.mixed_frame['foo'] | ||
del float_string_frame['foo'] | ||
|
||
empty_float = self.frame.reindex(columns=[]) | ||
empty_float = float_frame.reindex(columns=[]) | ||
|
||
# TODO(wesm): unused? | ||
result = empty_float.fillna(value=0) # noqa | ||
|
@@ -652,7 +656,7 @@ def test_fill_value_when_combine_const(self): | |
assert_frame_equal(res, exp) | ||
|
||
|
||
class TestDataFrameInterpolate(TestData): | ||
class TestDataFrameInterpolate(): | ||
|
||
def test_interp_basic(self): | ||
df = DataFrame({'A': [1, 2, np.nan, 4], | ||
|
Uh oh!
There was an error while loading. Please reload this page.