-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
standardize signature for Index reductions, implement nanmean for datetime64 dtypes #24293
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
f38ffe1
04cf1f7
3efab79
a652439
ce760d3
2380af6
4157f0b
50642ae
0baedf3
6e0e69f
e2c301b
fce67ac
4b4979f
82b8cdf
ffe6ada
7f7693f
07c3102
6c93410
4620c56
4777b75
aa4028a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -920,10 +920,16 @@ def _ndarray_values(self): | |
def empty(self): | ||
return not self.size | ||
|
||
def max(self): | ||
def max(self, axis=None, skipna=True): | ||
""" | ||
Return the maximum value of the Index. | ||
|
||
Parameters | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also we have similar things in pandas/core/base, these should change as well. |
||
---------- | ||
axis : {None} | ||
Dummy argument for consistency with Series | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not sure we use this terminology here
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think @jbrockmendel is listing the set of possible values, which we do use. In this case, that set is just the single value I think
is clearer. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @TomAugspurger verbiage would be good |
||
skipna : bool, default True | ||
|
||
Returns | ||
------- | ||
scalar | ||
|
@@ -951,22 +957,36 @@ def max(self): | |
>>> idx.max() | ||
('b', 2) | ||
""" | ||
nv.validate_minmax_axis(axis) | ||
return nanops.nanmax(self.values) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why isn't skipna passed? Is that a followup item? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had considered this a follow-up item, but am now leaning towards fixing these now. Good catch. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This turns out to be more involved than expected, requires some changes in Series._reduce and IndexOpsMixin._reduce. Neither of which is all that invasive, but changing them entails changing/fixing/testing behavior for other reductions I hadn't planned on doing in this PR. Currently going through to get that all done here. I'm also OK with doing this in two phases so as to take things out of #24024 sooner rather than later. |
||
|
||
def argmax(self, axis=None): | ||
def argmax(self, axis=None, skipna=True): | ||
""" | ||
Return a ndarray of the maximum argument indexer. | ||
|
||
Parameters | ||
---------- | ||
axis : {None} | ||
Dummy argument for consistency with Series | ||
skipna : bool, default True | ||
|
||
See Also | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should be consisten about the See Also, e.g. make sure in all, and add a refernce to the Series.min function as well (as appropriate) |
||
-------- | ||
numpy.ndarray.argmax | ||
""" | ||
nv.validate_minmax_axis(axis) | ||
return nanops.nanargmax(self.values) | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def min(self): | ||
def min(self, axis=None, skipna=True): | ||
""" | ||
Return the minimum value of the Index. | ||
|
||
Parameters | ||
---------- | ||
axis : {None} | ||
Dummy argument for consistency with Series | ||
skipna : bool, default True | ||
|
||
Returns | ||
------- | ||
scalar | ||
|
@@ -994,16 +1014,24 @@ def min(self): | |
>>> idx.min() | ||
('a', 1) | ||
""" | ||
nv.validate_minmax_axis(axis) | ||
return nanops.nanmin(self.values) | ||
|
||
def argmin(self, axis=None): | ||
def argmin(self, axis=None, skipna=True): | ||
""" | ||
Return a ndarray of the minimum argument indexer. | ||
|
||
Parameters | ||
---------- | ||
axis : {None} | ||
Dummy argument for consistency with Series | ||
skipna : bool, default True | ||
|
||
See Also | ||
-------- | ||
numpy.ndarray.argmin | ||
""" | ||
nv.validate_minmax_axis(axis) | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return nanops.nanargmin(self.values) | ||
|
||
def tolist(self): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -258,7 +258,7 @@ def tolist(self): | |
""" | ||
return list(self.astype(object)) | ||
|
||
def min(self, axis=None, *args, **kwargs): | ||
def min(self, axis=None, skipna=True, *args, **kwargs): | ||
""" | ||
Return the minimum value of the Index or minimum along | ||
an axis. | ||
|
@@ -286,7 +286,7 @@ def min(self, axis=None, *args, **kwargs): | |
except ValueError: | ||
return self._na_value | ||
|
||
def argmin(self, axis=None, *args, **kwargs): | ||
def argmin(self, axis=None, skipna=True, *args, **kwargs): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can these inherit the doc-string? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll take a look. Might also get rid of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm the docstrings here and for the corresponding methods in base.IndexOpsMixin are kind of clunky. This may merit a separate look, @datapythonista ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know well what's the class hierarchy and whether makes sense to inherit. But we'll have to add |
||
""" | ||
Returns the indices of the minimum values along an axis. | ||
|
||
|
@@ -309,7 +309,7 @@ def argmin(self, axis=None, *args, **kwargs): | |
i8[mask] = np.iinfo('int64').max | ||
return i8.argmin() | ||
|
||
def max(self, axis=None, *args, **kwargs): | ||
def max(self, axis=None, skipna=True, *args, **kwargs): | ||
""" | ||
Return the maximum value of the Index or maximum along | ||
an axis. | ||
|
@@ -337,7 +337,7 @@ def max(self, axis=None, *args, **kwargs): | |
except ValueError: | ||
return self._na_value | ||
|
||
def argmax(self, axis=None, *args, **kwargs): | ||
def argmax(self, axis=None, skipna=True, *args, **kwargs): | ||
""" | ||
Returns the indices of the maximum values along an axis. | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -297,12 +297,14 @@ def _minmax(self, meth): | |
|
||
return self._start + self._step * no_steps | ||
|
||
def min(self): | ||
def min(self, axis=None, skipna=True): | ||
"""The minimum value of the RangeIndex""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same. why don't these inherit the doc-string |
||
nv.validate_minmax_axis(axis) | ||
return self._minmax('min') | ||
|
||
def max(self): | ||
def max(self, axis=None, skipna=True): | ||
"""The maximum value of the RangeIndex""" | ||
nv.validate_minmax_axis(axis) | ||
return self._minmax('max') | ||
|
||
def argsort(self, *args, **kwargs): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,14 +6,14 @@ | |
|
||
import numpy as np | ||
|
||
from pandas._libs import lib, tslibs | ||
from pandas._libs import iNaT, lib, tslibs | ||
import pandas.compat as compat | ||
|
||
from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask | ||
from pandas.core.dtypes.common import ( | ||
_get_dtype, is_any_int_dtype, is_bool_dtype, is_complex, is_complex_dtype, | ||
is_datetime64_dtype, is_datetime_or_timedelta_dtype, is_float, | ||
is_float_dtype, is_integer, is_integer_dtype, is_numeric_dtype, | ||
is_datetime64_dtype, is_datetime64tz_dtype, is_datetime_or_timedelta_dtype, | ||
is_float, is_float_dtype, is_integer, is_integer_dtype, is_numeric_dtype, | ||
is_object_dtype, is_scalar, is_timedelta64_dtype) | ||
from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna | ||
|
||
|
@@ -203,6 +203,7 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, | |
if necessary copy and mask using the specified fill_value | ||
copy = True will force the copy | ||
""" | ||
orig_values = values | ||
values = com.values_from_object(values) | ||
|
||
if mask is None: | ||
|
@@ -212,6 +213,10 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, | |
mask = isna(values) | ||
|
||
dtype = values.dtype | ||
if is_datetime64tz_dtype(orig_values): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is a very very odd place to do this as no other dtype specific things are here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Semi-agree. Most of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. my point is this is in the wrong place. nanops already handles dtypes just not here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you be more specific as to where you want this? The place where There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so this should go in _view_if_needed (or maybe just remove that function and inline it here is ok There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. did you do this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Haven't yet. view_if_needed is called ~25 lines below this. It isn't obvious to me that it is OK to move it up to here. If it is, then sure, its a one-liner that can be inlined. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes move it, as I said above you have dtype inference in 2 places for no reason. |
||
dtype = orig_values.dtype | ||
|
||
values = getattr(values, 'asi8', values) | ||
dtype_ok = _na_ok_dtype(dtype) | ||
|
||
# get our fill value (in case we need to provide an alternative | ||
|
@@ -261,19 +266,24 @@ def _na_ok_dtype(dtype): | |
|
||
def _view_if_needed(values): | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if is_datetime_or_timedelta_dtype(values): | ||
return values.view(np.int64) | ||
try: | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return values.view(np.int64) | ||
except AttributeError: | ||
# TODO: once DatetimeArray has `view`, get rid of this | ||
return values.asi8 | ||
return values | ||
|
||
|
||
def _wrap_results(result, dtype, fill_value=None): | ||
""" wrap our results if needed """ | ||
|
||
if is_datetime64_dtype(dtype): | ||
if is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): | ||
if not isinstance(result, np.ndarray): | ||
tz = getattr(dtype, 'tz', None) | ||
assert not isna(fill_value), "Expected non-null fill_value" | ||
if result == fill_value: | ||
result = np.nan | ||
result = tslibs.Timestamp(result) | ||
result = tslibs.Timestamp(result, tz=tz) | ||
else: | ||
result = result.view(dtype) | ||
elif is_timedelta64_dtype(dtype): | ||
|
@@ -426,7 +436,6 @@ def nansum(values, axis=None, skipna=True, min_count=0, mask=None): | |
return _wrap_results(the_sum, dtype) | ||
|
||
|
||
@disallow('M8') | ||
@bottleneck_switch() | ||
def nanmean(values, axis=None, skipna=True, mask=None): | ||
""" | ||
|
@@ -457,7 +466,8 @@ def nanmean(values, axis=None, skipna=True, mask=None): | |
values, skipna, 0, mask=mask) | ||
dtype_sum = dtype_max | ||
dtype_count = np.float64 | ||
if is_integer_dtype(dtype) or is_timedelta64_dtype(dtype): | ||
if (is_integer_dtype(dtype) or is_timedelta64_dtype(dtype) or | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype)): | ||
dtype_sum = np.float64 | ||
elif is_float_dtype(dtype): | ||
dtype_sum = dtype | ||
|
@@ -466,14 +476,20 @@ def nanmean(values, axis=None, skipna=True, mask=None): | |
the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum)) | ||
|
||
if axis is not None and getattr(the_sum, 'ndim', False): | ||
the_mean = the_sum / count | ||
with np.errstate(all="ignore"): | ||
# suppress division by zero warnings | ||
the_mean = the_sum / count | ||
ct_mask = count == 0 | ||
if ct_mask.any(): | ||
the_mean[ct_mask] = np.nan | ||
else: | ||
the_mean = the_sum / count if count > 0 else np.nan | ||
|
||
return _wrap_results(the_mean, dtype) | ||
fill_value = None | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): | ||
fill_value = iNaT | ||
|
||
return _wrap_results(the_mean, dtype, fill_value=fill_value) | ||
|
||
|
||
@disallow('M8') | ||
|
Uh oh!
There was an error while loading. Please reload this page.