-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
standardize signature for Index reductions, implement nanmean for datetime64 dtypes #24293
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
f38ffe1
04cf1f7
3efab79
a652439
ce760d3
2380af6
4157f0b
50642ae
0baedf3
6e0e69f
e2c301b
fce67ac
4b4979f
82b8cdf
ffe6ada
7f7693f
07c3102
6c93410
4620c56
4777b75
aa4028a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -957,10 +957,16 @@ def _ndarray_values(self): | |
def empty(self): | ||
return not self.size | ||
|
||
def max(self): | ||
def max(self, axis=None, skipna=True): | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
Return the maximum value of the Index. | ||
|
||
Parameters | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also we have similar things in pandas/core/base, these should change as well. |
||
---------- | ||
axis : {None} | ||
Dummy argument for consistency with Series | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not sure we use this terminology here
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think @jbrockmendel is listing the set of possible values, which we do use. In this case, that set is just the single value I think
is clearer. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @TomAugspurger verbiage would be good |
||
skipna : bool, default True | ||
|
||
Returns | ||
------- | ||
scalar | ||
|
@@ -988,22 +994,36 @@ def max(self): | |
>>> idx.max() | ||
('b', 2) | ||
""" | ||
return nanops.nanmax(self.values) | ||
nv.validate_minmax_axis(axis) | ||
return nanops.nanmax(self._values, skipna=skipna) | ||
|
||
def argmax(self, axis=None): | ||
def argmax(self, axis=None, skipna=True): | ||
""" | ||
Return a ndarray of the maximum argument indexer. | ||
|
||
Parameters | ||
---------- | ||
axis : {None} | ||
Dummy argument for consistency with Series | ||
skipna : bool, default True | ||
|
||
See Also | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should be consisten about the See Also, e.g. make sure in all, and add a refernce to the Series.min function as well (as appropriate) |
||
-------- | ||
numpy.ndarray.argmax | ||
""" | ||
return nanops.nanargmax(self.values) | ||
nv.validate_minmax_axis(axis) | ||
return nanops.nanargmax(self._values, skipna=skipna) | ||
|
||
def min(self): | ||
def min(self, axis=None, skipna=True): | ||
""" | ||
Return the minimum value of the Index. | ||
|
||
Parameters | ||
---------- | ||
axis : {None} | ||
Dummy argument for consistency with Series | ||
skipna : bool, default True | ||
|
||
Returns | ||
------- | ||
scalar | ||
|
@@ -1031,17 +1051,25 @@ def min(self): | |
>>> idx.min() | ||
('a', 1) | ||
""" | ||
return nanops.nanmin(self.values) | ||
nv.validate_minmax_axis(axis) | ||
return nanops.nanmin(self._values, skipna=skipna) | ||
|
||
def argmin(self, axis=None): | ||
def argmin(self, axis=None, skipna=True): | ||
""" | ||
Return a ndarray of the minimum argument indexer. | ||
|
||
Parameters | ||
---------- | ||
axis : {None} | ||
Dummy argument for consistency with Series | ||
skipna : bool, default True | ||
|
||
See Also | ||
-------- | ||
numpy.ndarray.argmin | ||
""" | ||
return nanops.nanargmin(self.values) | ||
nv.validate_minmax_axis(axis) | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return nanops.nanargmin(self._values, skipna=skipna) | ||
|
||
def tolist(self): | ||
""" | ||
|
@@ -1088,7 +1116,7 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, | |
if func is None: | ||
raise TypeError("{klass} cannot perform the operation {op}".format( | ||
klass=self.__class__.__name__, op=name)) | ||
return func(**kwds) | ||
return func(skipna=skipna, **kwds) | ||
|
||
def _map_values(self, mapper, na_action=None): | ||
""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,6 +40,7 @@ class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin): | |
# override DatetimeLikeArrayMixin method | ||
copy = Index.copy | ||
unique = Index.unique | ||
view = Index.view | ||
|
||
# DatetimeLikeArrayMixin assumes subclasses are mutable, so these are | ||
# properties there. They can be made into cache_readonly for Index | ||
|
@@ -244,7 +245,7 @@ def tolist(self): | |
""" | ||
return list(self.astype(object)) | ||
|
||
def min(self, axis=None, *args, **kwargs): | ||
def min(self, axis=None, skipna=True, *args, **kwargs): | ||
""" | ||
Return the minimum value of the Index or minimum along | ||
an axis. | ||
|
@@ -256,23 +257,28 @@ def min(self, axis=None, *args, **kwargs): | |
nv.validate_min(args, kwargs) | ||
nv.validate_minmax_axis(axis) | ||
|
||
try: | ||
i8 = self.asi8 | ||
if not len(self): | ||
return self._na_value | ||
|
||
i8 = self.asi8 | ||
try: | ||
# quick check | ||
if len(i8) and self.is_monotonic: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these quick checks make sense for Index as they are immutable, but may not make much sense here (but i guess can evaluate later) |
||
if i8[0] != iNaT: | ||
return self._box_func(i8[0]) | ||
|
||
if self.hasnans: | ||
min_stamp = self[~self._isnan].asi8.min() | ||
if skipna: | ||
min_stamp = self[~self._isnan].asi8.min() | ||
else: | ||
return self._na_value | ||
else: | ||
min_stamp = i8.min() | ||
return self._box_func(min_stamp) | ||
except ValueError: | ||
return self._na_value | ||
|
||
def argmin(self, axis=None, *args, **kwargs): | ||
def argmin(self, axis=None, skipna=True, *args, **kwargs): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can these inherit the doc-string? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll take a look. Might also get rid of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm the docstrings here and for the corresponding methods in base.IndexOpsMixin are kind of clunky. This may merit a separate look, @datapythonista ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know well what's the class hierarchy and whether makes sense to inherit. But we'll have to add |
||
""" | ||
Returns the indices of the minimum values along an axis. | ||
|
||
|
@@ -289,13 +295,13 @@ def argmin(self, axis=None, *args, **kwargs): | |
i8 = self.asi8 | ||
if self.hasnans: | ||
mask = self._isnan | ||
if mask.all(): | ||
if mask.all() or not skipna: | ||
return -1 | ||
i8 = i8.copy() | ||
i8[mask] = np.iinfo('int64').max | ||
return i8.argmin() | ||
|
||
def max(self, axis=None, *args, **kwargs): | ||
def max(self, axis=None, skipna=True, *args, **kwargs): | ||
""" | ||
Return the maximum value of the Index or maximum along | ||
an axis. | ||
|
@@ -307,23 +313,28 @@ def max(self, axis=None, *args, **kwargs): | |
nv.validate_max(args, kwargs) | ||
nv.validate_minmax_axis(axis) | ||
|
||
try: | ||
i8 = self.asi8 | ||
if not len(self): | ||
return self._na_value | ||
|
||
i8 = self.asi8 | ||
try: | ||
# quick check | ||
if len(i8) and self.is_monotonic: | ||
if i8[-1] != iNaT: | ||
return self._box_func(i8[-1]) | ||
|
||
if self.hasnans: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same |
||
max_stamp = self[~self._isnan].asi8.max() | ||
if skipna: | ||
max_stamp = self[~self._isnan].asi8.max() | ||
else: | ||
return self._na_value | ||
else: | ||
max_stamp = i8.max() | ||
return self._box_func(max_stamp) | ||
except ValueError: | ||
return self._na_value | ||
|
||
def argmax(self, axis=None, *args, **kwargs): | ||
def argmax(self, axis=None, skipna=True, *args, **kwargs): | ||
""" | ||
Returns the indices of the maximum values along an axis. | ||
|
||
|
@@ -340,7 +351,7 @@ def argmax(self, axis=None, *args, **kwargs): | |
i8 = self.asi8 | ||
if self.hasnans: | ||
mask = self._isnan | ||
if mask.all(): | ||
if mask.all() or not skipna: | ||
return -1 | ||
i8 = i8.copy() | ||
i8[mask] = 0 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -297,12 +297,14 @@ def _minmax(self, meth): | |
|
||
return self._start + self._step * no_steps | ||
|
||
def min(self): | ||
def min(self, axis=None, skipna=True): | ||
"""The minimum value of the RangeIndex""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same. why don't these inherit the doc-string |
||
nv.validate_minmax_axis(axis) | ||
return self._minmax('min') | ||
|
||
def max(self): | ||
def max(self, axis=None, skipna=True): | ||
"""The maximum value of the RangeIndex""" | ||
nv.validate_minmax_axis(axis) | ||
return self._minmax('max') | ||
|
||
def argsort(self, *args, **kwargs): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,14 +6,14 @@ | |
|
||
import numpy as np | ||
|
||
from pandas._libs import lib, tslibs | ||
from pandas._libs import iNaT, lib, tslibs | ||
import pandas.compat as compat | ||
|
||
from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask | ||
from pandas.core.dtypes.common import ( | ||
_get_dtype, is_any_int_dtype, is_bool_dtype, is_complex, is_complex_dtype, | ||
is_datetime64_dtype, is_datetime_or_timedelta_dtype, is_float, | ||
is_float_dtype, is_integer, is_integer_dtype, is_numeric_dtype, | ||
is_datetime64_dtype, is_datetime64tz_dtype, is_datetime_or_timedelta_dtype, | ||
is_float, is_float_dtype, is_integer, is_integer_dtype, is_numeric_dtype, | ||
is_object_dtype, is_scalar, is_timedelta64_dtype) | ||
from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna | ||
|
||
|
@@ -203,6 +203,7 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, | |
if necessary copy and mask using the specified fill_value | ||
copy = True will force the copy | ||
""" | ||
orig_values = values | ||
values = com.values_from_object(values) | ||
|
||
if mask is None: | ||
|
@@ -212,6 +213,10 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, | |
mask = isna(values) | ||
|
||
dtype = values.dtype | ||
if is_datetime64tz_dtype(orig_values): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is a very very odd place to do this as no other dtype specific things are here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Semi-agree. Most of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. my point is this is in the wrong place. nanops already handles dtypes just not here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you be more specific as to where you want this? The place where There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so this should go in _view_if_needed (or maybe just remove that function and inline it here is ok There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. did you do this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Haven't yet. view_if_needed is called ~25 lines below this. It isn't obvious to me that it is OK to move it up to here. If it is, then sure, its a one-liner that can be inlined. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes move it, as I said above you have dtype inference in 2 places for no reason. |
||
dtype = orig_values.dtype | ||
|
||
values = getattr(values, 'asi8', values) | ||
dtype_ok = _na_ok_dtype(dtype) | ||
|
||
# get our fill value (in case we need to provide an alternative | ||
|
@@ -268,12 +273,16 @@ def _view_if_needed(values): | |
def _wrap_results(result, dtype, fill_value=None): | ||
""" wrap our results if needed """ | ||
|
||
if is_datetime64_dtype(dtype): | ||
if is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): | ||
if fill_value is None: | ||
# GH#24293 | ||
fill_value = iNaT | ||
if not isinstance(result, np.ndarray): | ||
tz = getattr(dtype, 'tz', None) | ||
assert not isna(fill_value), "Expected non-null fill_value" | ||
if result == fill_value: | ||
result = np.nan | ||
result = tslibs.Timestamp(result) | ||
result = tslibs.Timestamp(result, tz=tz) | ||
else: | ||
result = result.view(dtype) | ||
elif is_timedelta64_dtype(dtype): | ||
|
@@ -426,7 +435,6 @@ def nansum(values, axis=None, skipna=True, min_count=0, mask=None): | |
return _wrap_results(the_sum, dtype) | ||
|
||
|
||
@disallow('M8') | ||
@bottleneck_switch() | ||
def nanmean(values, axis=None, skipna=True, mask=None): | ||
""" | ||
|
@@ -457,7 +465,8 @@ def nanmean(values, axis=None, skipna=True, mask=None): | |
values, skipna, 0, mask=mask) | ||
dtype_sum = dtype_max | ||
dtype_count = np.float64 | ||
if is_integer_dtype(dtype) or is_timedelta64_dtype(dtype): | ||
if (is_integer_dtype(dtype) or is_timedelta64_dtype(dtype) or | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype)): | ||
dtype_sum = np.float64 | ||
elif is_float_dtype(dtype): | ||
dtype_sum = dtype | ||
|
@@ -466,7 +475,9 @@ def nanmean(values, axis=None, skipna=True, mask=None): | |
the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum)) | ||
|
||
if axis is not None and getattr(the_sum, 'ndim', False): | ||
the_mean = the_sum / count | ||
with np.errstate(all="ignore"): | ||
# suppress division by zero warnings | ||
the_mean = the_sum / count | ||
ct_mask = count == 0 | ||
if ct_mask.any(): | ||
the_mean[ct_mask] = np.nan | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,9 +17,10 @@ | |
|
||
from pandas.core.dtypes.common import ( | ||
_is_unorderable_exception, ensure_platform_int, is_bool, | ||
is_categorical_dtype, is_datetime64tz_dtype, is_datetimelike, is_dict_like, | ||
is_extension_array_dtype, is_extension_type, is_hashable, is_integer, | ||
is_iterator, is_list_like, is_scalar, is_string_like, is_timedelta64_dtype) | ||
is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype, | ||
is_datetimelike, is_dict_like, is_extension_array_dtype, is_extension_type, | ||
is_hashable, is_integer, is_iterator, is_list_like, is_scalar, | ||
is_string_like, is_timedelta64_dtype) | ||
from pandas.core.dtypes.generic import ( | ||
ABCDataFrame, ABCDatetimeIndex, ABCSeries, ABCSparseArray, ABCSparseSeries) | ||
from pandas.core.dtypes.missing import ( | ||
|
@@ -3493,6 +3494,9 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, | |
# dispatch to ExtensionArray interface | ||
if isinstance(delegate, ExtensionArray): | ||
return delegate._reduce(name, skipna=skipna, **kwds) | ||
elif is_datetime64_dtype(delegate): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should this be first? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it isn't clear to me that it would make a difference |
||
# use DatetimeIndex implementation to handle skipna correctly | ||
delegate = DatetimeIndex(delegate) | ||
|
||
# dispatch to numpy arrays | ||
elif isinstance(delegate, np.ndarray): | ||
|
Uh oh!
There was an error while loading. Please reload this page.