Skip to content

support axis=None for nanmedian ( issue #7352 ) #7440

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 12, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/v0.14.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,8 @@ Bug Fixes
(:issue:`7353`)
- Bug in several ``nanops`` functions when ``axis==0`` for
1-dimensional ``nan`` arrays (:issue:`7354`)
- Bug where ``nanops.nanmedian`` doesn't work when ``axis==None``
(:issue:`7352`)



Expand Down
3 changes: 3 additions & 0 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,9 @@ def get_median(x):
if values.dtype != np.float64:
values = values.astype('f8')

if axis is None:
values = values.ravel()

notempty = values.size

# an array from a frame
Expand Down
76 changes: 49 additions & 27 deletions pandas/tests/test_nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

class TestnanopsDataFrame(tm.TestCase):
def setUp(self):
np.random.seed(11235)

self.arr_shape = (11, 7, 5)

self.arr_float = np.random.randn(*self.arr_shape)
Expand Down Expand Up @@ -118,11 +120,38 @@ def check_results(self, targ, res, axis):
res = getattr(res, 'values', res)
if axis != 0 and hasattr(targ, 'shape') and targ.ndim:
res = np.split(res, [targ.shape[0]], axis=0)[0]
tm.assert_almost_equal(targ, res)
try:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can just pass check_less_precise=True if its a complex number (or explicty astype before the comparison)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried that, it doesn't fix the problem. It still raises an AssertionError even if they only differ in their 16th digit.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

>>> a=np.array([1+.1111111111111111*1j])
>>> b=np.array([1+.1111111111111112*1j])
>>> tm.assert_almost_equal(a, b, check_less_precise=True)
AssertionError: (1+0.1111111111111111j) != (1+0.1111111111111112j)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

compare the real and imag pars separately and then it works correctly. side issue is to patch tm.assert_almost_equal to deal with complex numbers by this method

In [1]: a=np.array([1+.1111111111111111*1j])

In [2]: b=np.array([1+.1111111111111112*1j])

In [4]: tm.assert_almost_equal(a.real, b.real)
Out[4]: True

In [5]: tm.assert_almost_equal(a.imag, b.imag)
Out[5]: True

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

tm.assert_almost_equal(targ, res)
except:
# There are sometimes rounding errors with
# complex and object dtypes.
# If it isn't one of those, re-raise the error.
if not hasattr(res, 'dtype') or res.dtype.kind not in ['c', 'O']:
raise
# convert object dtypes to something that can be split into
# real and imaginary parts
if res.dtype.kind == 'O':
if targ.dtype.kind != 'O':
res = res.astype(targ.dtype)
else:
try:
res = res.astype('c16')
except:
res = res.astype('f8')
try:
targ = targ.astype('c16')
except:
targ = targ.astype('f8')
# there should never be a case where numpy returns an object
# but nanops doesn't, so make that an exception
elif targ.dtype.kind == 'O':
raise
tm.assert_almost_equal(targ.real, res.real)
tm.assert_almost_equal(targ.imag, res.imag)

def check_fun_data(self, testfunc, targfunc,
testarval, targarval, targarnanval, **kwargs):
for axis in list(range(targarval.ndim)):
for axis in list(range(targarval.ndim))+[None]:
for skipna in [False, True]:
targartempval = targarval if skipna else targarnanval
try:
Expand Down Expand Up @@ -215,6 +244,12 @@ def check_funs(self, testfunc, targfunc,

if allow_obj:
self.arr_obj = np.vstack(objs)
# some nanops handle object dtypes better than their numpy
# counterparts, so the numpy functions need to be given something
# else
if allow_obj == 'convert':
targfunc = partial(self._badobj_wrap,
func=targfunc, allow_complex=allow_complex)
self.check_fun(testfunc, targfunc, 'arr_obj', **kwargs)

def check_funs_ddof(self, testfunc, targfunc,
Expand All @@ -229,6 +264,14 @@ def check_funs_ddof(self, testfunc, targfunc,
except BaseException as exc:
exc.args += ('ddof %s' % ddof,)

def _badobj_wrap(self, value, func, allow_complex=True, **kwargs):
if value.dtype.kind == 'O':
if allow_complex:
value = value.astype('c16')
else:
value = value.astype('f8')
return func(value, **kwargs)

def test_nanany(self):
self.check_funs(nanops.nanany, np.any,
allow_all_nan=False, allow_str=False, allow_date=False)
Expand All @@ -241,36 +284,15 @@ def test_nansum(self):
self.check_funs(nanops.nansum, np.sum,
allow_str=False, allow_date=False)

def _nanmean_wrap(self, value, *args, **kwargs):
dtype = value.dtype
res = nanops.nanmean(value, *args, **kwargs)
if dtype.kind == 'O':
res = np.round(res, decimals=13)
return res

def _mean_wrap(self, value, *args, **kwargs):
dtype = value.dtype
if dtype.kind == 'O':
value = value.astype('c16')
res = np.mean(value, *args, **kwargs)
if dtype.kind == 'O':
res = np.round(res, decimals=13)
return res

def test_nanmean(self):
self.check_funs(self._nanmean_wrap, self._mean_wrap,
self.check_funs(nanops.nanmean, np.mean,
allow_complex=False, allow_obj=False,
allow_str=False, allow_date=False)

def _median_wrap(self, value, *args, **kwargs):
if value.dtype.kind == 'O':
value = value.astype('c16')
res = np.median(value, *args, **kwargs)
return res

def test_nanmedian(self):
self.check_funs(nanops.nanmedian, self._median_wrap,
allow_complex=False, allow_str=False, allow_date=False)
self.check_funs(nanops.nanmedian, np.median,
allow_complex=False, allow_str=False, allow_date=False,
allow_obj='convert')

def test_nanvar(self):
self.check_funs_ddof(nanops.nanvar, np.var,
Expand Down