Skip to content

CLN: test_nanops/take.py #48999

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 10, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
257 changes: 129 additions & 128 deletions pandas/tests/test_nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,18 +299,18 @@ def test_nanmean(self, skipna):
nanops.nanmean, np.mean, skipna, allow_obj=False, allow_date=False
)

def test_nanmean_overflow(self):
@pytest.mark.parametrize("val", [2**55, -(2**55), 20150515061816532])
def test_nanmean_overflow(self, val):
# GH 10155
# In the previous implementation mean can overflow for int dtypes, it
# is now consistent with numpy

for a in [2**55, -(2**55), 20150515061816532]:
s = Series(a, index=range(500), dtype=np.int64)
result = s.mean()
np_result = s.values.mean()
assert result == a
assert result == np_result
assert result.dtype == np.float64
s = Series(val, index=range(500), dtype=np.int64)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick s->ser

result = s.mean()
np_result = s.values.mean()
assert result == val
assert result == np_result
assert result.dtype == np.float64

@pytest.mark.parametrize(
"dtype",
Expand Down Expand Up @@ -623,7 +623,19 @@ def test_nancov(self):
targ1 = np.cov(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1]
self.check_nancorr_nancov_1d(nanops.nancov, targ0, targ1)

def check_nancomp(self, checkfun, targ0):
@pytest.mark.parametrize(
"op,nanop",
[
(operator.eq, nanops.naneq),
(operator.ne, nanops.nanne),
(operator.gt, nanops.nangt),
(operator.ge, nanops.nange),
(operator.lt, nanops.nanlt),
(operator.le, nanops.nanle),
],
)
def test_nan_comparison(self, op, nanop):
targ0 = op(self.arr_float, self.arr_float1)
arr_float = self.arr_float
arr_float1 = self.arr_float1
arr_nan = self.arr_nan
Expand All @@ -633,18 +645,18 @@ def check_nancomp(self, checkfun, targ0):
arr_nan_float1 = self.arr_nan_float1

while targ0.ndim:
res0 = checkfun(arr_float, arr_float1)
res0 = nanop(arr_float, arr_float1)
tm.assert_almost_equal(targ0, res0)

if targ0.ndim > 1:
targ1 = np.vstack([targ0, arr_nan])
else:
targ1 = np.hstack([targ0, arr_nan])
res1 = checkfun(arr_float_nan, arr_float1_nan)
res1 = nanop(arr_float_nan, arr_float1_nan)
tm.assert_numpy_array_equal(targ1, res1, check_dtype=False)

targ2 = arr_nan_nan
res2 = checkfun(arr_float_nan, arr_nan_float1)
res2 = nanop(arr_float_nan, arr_nan_float1)
tm.assert_numpy_array_equal(targ2, res2, check_dtype=False)

# Lower dimension for next step in the loop
Expand All @@ -658,48 +670,38 @@ def check_nancomp(self, checkfun, targ0):
targ0 = np.take(targ0, 0, axis=-1)

@pytest.mark.parametrize(
"op,nanop",
"arr, correct",
[
(operator.eq, nanops.naneq),
(operator.ne, nanops.nanne),
(operator.gt, nanops.nangt),
(operator.ge, nanops.nange),
(operator.lt, nanops.nanlt),
(operator.le, nanops.nanle),
("arr_complex", False),
("arr_int", False),
("arr_bool", False),
("arr_str", False),
("arr_utf", False),
("arr_complex", False),
("arr_complex_nan", False),
("arr_nan_nanj", False),
("arr_nan_infj", True),
("arr_complex_nan_infj", True),
],
)
def test_nan_comparison(self, op, nanop):
targ0 = op(self.arr_float, self.arr_float1)
self.check_nancomp(nanop, targ0)

def check_bool(self, func, value, correct):
while getattr(value, "ndim", True):
res0 = func(value)
def test__has_infs_non_float(self, arr, correct):
val = getattr(self, arr)
while getattr(val, "ndim", True):
res0 = nanops._has_infs(val)
if correct:
assert res0
else:
assert not res0

if not hasattr(value, "ndim"):
if not hasattr(val, "ndim"):
break

# Reduce dimension for next step in the loop
value = np.take(value, 0, axis=-1)
val = np.take(val, 0, axis=-1)

def test__has_infs(self):
pairs = [
("arr_complex", False),
("arr_int", False),
("arr_bool", False),
("arr_str", False),
("arr_utf", False),
("arr_complex", False),
("arr_complex_nan", False),
("arr_nan_nanj", False),
("arr_nan_infj", True),
("arr_complex_nan_infj", True),
]
pairs_float = [
@pytest.mark.parametrize(
"arr, correct",
[
("arr_float", False),
("arr_nan", False),
("arr_float_nan", False),
Expand All @@ -709,17 +711,25 @@ def test__has_infs(self):
("arr_nan_inf", True),
("arr_float_nan_inf", True),
("arr_nan_nan_inf", True),
]
],
)
@pytest.mark.parametrize("astype", [None, "f4", "f2"])
def test__has_infs_floats(self, arr, correct, astype):
val = getattr(self, arr)
if astype is not None:
val = val.astype(astype)
while getattr(val, "ndim", True):
res0 = nanops._has_infs(val)
if correct:
assert res0
else:
assert not res0

for arr, correct in pairs:
val = getattr(self, arr)
self.check_bool(nanops._has_infs, val, correct)
if not hasattr(val, "ndim"):
break

for arr, correct in pairs_float:
val = getattr(self, arr)
self.check_bool(nanops._has_infs, val, correct)
self.check_bool(nanops._has_infs, val.astype("f4"), correct)
self.check_bool(nanops._has_infs, val.astype("f2"), correct)
# Reduce dimension for next step in the loop
val = np.take(val, 0, axis=-1)

def test__bn_ok_dtype(self):
assert nanops._bn_ok_dtype(self.arr_float.dtype, "test")
Expand Down Expand Up @@ -784,46 +794,47 @@ def test_non_convertable_values(self):
class TestNanvarFixedValues:

# xref GH10242
# Samples from a normal distribution.
@pytest.fixture
def variance(self):
return 3.0

def setup_method(self):
# Samples from a normal distribution.
self.variance = variance = 3.0
self.samples = self.prng.normal(scale=variance**0.5, size=100000)
@pytest.fixture
def samples(self, variance):
return self.prng.normal(scale=variance**0.5, size=100000)

def test_nanvar_all_finite(self):
samples = self.samples
def test_nanvar_all_finite(self, samples, variance):
actual_variance = nanops.nanvar(samples)
tm.assert_almost_equal(actual_variance, self.variance, rtol=1e-2)
tm.assert_almost_equal(actual_variance, variance, rtol=1e-2)

def test_nanvar_nans(self):
samples = np.nan * np.ones(2 * self.samples.shape[0])
samples[::2] = self.samples
def test_nanvar_nans(self, samples, variance):
samples_test = np.nan * np.ones(2 * samples.shape[0])
samples_test[::2] = samples

actual_variance = nanops.nanvar(samples, skipna=True)
tm.assert_almost_equal(actual_variance, self.variance, rtol=1e-2)
actual_variance = nanops.nanvar(samples_test, skipna=True)
tm.assert_almost_equal(actual_variance, variance, rtol=1e-2)

actual_variance = nanops.nanvar(samples, skipna=False)
actual_variance = nanops.nanvar(samples_test, skipna=False)
tm.assert_almost_equal(actual_variance, np.nan, rtol=1e-2)

def test_nanstd_nans(self):
samples = np.nan * np.ones(2 * self.samples.shape[0])
samples[::2] = self.samples
def test_nanstd_nans(self, samples, variance):
samples_test = np.nan * np.ones(2 * samples.shape[0])
samples_test[::2] = samples

actual_std = nanops.nanstd(samples, skipna=True)
tm.assert_almost_equal(actual_std, self.variance**0.5, rtol=1e-2)
actual_std = nanops.nanstd(samples_test, skipna=True)
tm.assert_almost_equal(actual_std, variance**0.5, rtol=1e-2)

actual_std = nanops.nanvar(samples, skipna=False)
actual_std = nanops.nanvar(samples_test, skipna=False)
tm.assert_almost_equal(actual_std, np.nan, rtol=1e-2)

def test_nanvar_axis(self):
def test_nanvar_axis(self, samples, variance):
# Generate some sample data.
samples_norm = self.samples
samples_unif = self.prng.uniform(size=samples_norm.shape[0])
samples = np.vstack([samples_norm, samples_unif])
samples_unif = self.prng.uniform(size=samples.shape[0])
samples = np.vstack([samples, samples_unif])

actual_variance = nanops.nanvar(samples, axis=1)
tm.assert_almost_equal(
actual_variance, np.array([self.variance, 1.0 / 12]), rtol=1e-2
actual_variance, np.array([variance, 1.0 / 12]), rtol=1e-2
)

def test_nanvar_ddof(self):
Expand Down Expand Up @@ -901,18 +912,21 @@ def prng(self):
class TestNanskewFixedValues:

# xref GH 11974
# Test data + skewness value (computed with scipy.stats.skew)
@pytest.fixture
def samples(self):
return np.sin(np.linspace(0, 1, 200))

def setup_method(self):
# Test data + skewness value (computed with scipy.stats.skew)
self.samples = np.sin(np.linspace(0, 1, 200))
self.actual_skew = -0.1875895205961754
@pytest.fixture
def actual_skew(self):
return -0.1875895205961754

def test_constant_series(self):
@pytest.mark.parametrize("val", [3075.2, 3075.3, 3075.5])
def test_constant_series(self, val):
# xref GH 11974
for val in [3075.2, 3075.3, 3075.5]:
data = val * np.ones(300)
skew = nanops.nanskew(data)
assert skew == 0.0
data = val * np.ones(300)
skew = nanops.nanskew(data)
assert skew == 0.0

def test_all_finite(self):
alpha, beta = 0.3, 0.1
Expand All @@ -923,24 +937,24 @@ def test_all_finite(self):
right_tailed = self.prng.beta(alpha, beta, size=100)
assert nanops.nanskew(right_tailed) > 0

def test_ground_truth(self):
skew = nanops.nanskew(self.samples)
tm.assert_almost_equal(skew, self.actual_skew)
def test_ground_truth(self, samples, actual_skew):
skew = nanops.nanskew(samples)
tm.assert_almost_equal(skew, actual_skew)

def test_axis(self):
samples = np.vstack([self.samples, np.nan * np.ones(len(self.samples))])
def test_axis(self, samples, actual_skew):
samples = np.vstack([samples, np.nan * np.ones(len(samples))])
skew = nanops.nanskew(samples, axis=1)
tm.assert_almost_equal(skew, np.array([self.actual_skew, np.nan]))
tm.assert_almost_equal(skew, np.array([actual_skew, np.nan]))

def test_nans(self):
samples = np.hstack([self.samples, np.nan])
def test_nans(self, samples):
samples = np.hstack([samples, np.nan])
skew = nanops.nanskew(samples, skipna=False)
assert np.isnan(skew)

def test_nans_skipna(self):
samples = np.hstack([self.samples, np.nan])
def test_nans_skipna(self, samples, actual_skew):
samples = np.hstack([samples, np.nan])
skew = nanops.nanskew(samples, skipna=True)
tm.assert_almost_equal(skew, self.actual_skew)
tm.assert_almost_equal(skew, actual_skew)

@property
def prng(self):
Expand All @@ -950,11 +964,14 @@ def prng(self):
class TestNankurtFixedValues:

# xref GH 11974
# Test data + kurtosis value (computed with scipy.stats.kurtosis)
@pytest.fixture
def samples(self):
return np.sin(np.linspace(0, 1, 200))

def setup_method(self):
# Test data + kurtosis value (computed with scipy.stats.kurtosis)
self.samples = np.sin(np.linspace(0, 1, 200))
self.actual_kurt = -1.2058303433799713
@pytest.fixture
def actual_kurt(self):
return -1.2058303433799713

@pytest.mark.parametrize("val", [3075.2, 3075.3, 3075.5])
def test_constant_series(self, val):
Expand All @@ -972,24 +989,24 @@ def test_all_finite(self):
right_tailed = self.prng.beta(alpha, beta, size=100)
assert nanops.nankurt(right_tailed) > 0

def test_ground_truth(self):
kurt = nanops.nankurt(self.samples)
tm.assert_almost_equal(kurt, self.actual_kurt)
def test_ground_truth(self, samples, actual_kurt):
kurt = nanops.nankurt(samples)
tm.assert_almost_equal(kurt, actual_kurt)

def test_axis(self):
samples = np.vstack([self.samples, np.nan * np.ones(len(self.samples))])
def test_axis(self, samples, actual_kurt):
samples = np.vstack([samples, np.nan * np.ones(len(samples))])
kurt = nanops.nankurt(samples, axis=1)
tm.assert_almost_equal(kurt, np.array([self.actual_kurt, np.nan]))
tm.assert_almost_equal(kurt, np.array([actual_kurt, np.nan]))

def test_nans(self):
samples = np.hstack([self.samples, np.nan])
def test_nans(self, samples):
samples = np.hstack([samples, np.nan])
kurt = nanops.nankurt(samples, skipna=False)
assert np.isnan(kurt)

def test_nans_skipna(self):
samples = np.hstack([self.samples, np.nan])
def test_nans_skipna(self, samples, actual_kurt):
samples = np.hstack([samples, np.nan])
kurt = nanops.nankurt(samples, skipna=True)
tm.assert_almost_equal(kurt, self.actual_kurt)
tm.assert_almost_equal(kurt, actual_kurt)

@property
def prng(self):
Expand Down Expand Up @@ -1123,22 +1140,6 @@ def test_check_below_min_count__large_shape(min_count, expected_result):


@pytest.mark.parametrize("func", ["nanmean", "nansum"])
@pytest.mark.parametrize(
"dtype",
[
np.uint8,
np.uint16,
np.uint32,
np.uint64,
np.int8,
np.int16,
np.int32,
np.int64,
np.float16,
np.float32,
np.float64,
],
)
def test_check_bottleneck_disallow(dtype, func):
def test_check_bottleneck_disallow(any_real_numpy_dtype, func):
# GH 42878 bottleneck sometimes produces unreliable results for mean and sum
assert not nanops._bn_ok_dtype(dtype, func)
assert not nanops._bn_ok_dtype(np.dtype(any_real_numpy_dtype).type, func)
Loading