-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
REF: dont set ndarray.data in libreduction #34997
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
4c5eddd
c632c9f
9e64be3
42649fb
47121dd
1decb3e
57c5dd3
a358463
ffa7ad7
e5e98d4
408db5a
d3493cf
75a805a
9f61070
2d10f6e
3e20187
51205a5
f453c5b
2ae2124
98a91a3
5f73b03
065fc69
c230f72
bf2e171
ba48381
e52db7d
972359f
28f6ca5
0aa2a54
0164b8a
48b5847
4abfaea
3b4be02
fb18f47
d26090d
97ed706
0569e29
7b2d437
7e90686
0d2b936
2bcc156
55bdb16
40008d0
6f2ca92
e766895
65407bc
238de4c
13c0dd3
435a1d0
0d28752
29c0bc2
c67b707
b8181f4
88b5e10
f9ce579
911e997
4480b4a
c2a0eac
366f63c
8631f2e
66b3b5a
4550cf1
7db9d22
172c626
f895c6a
bb5b86a
de2a1dc
d9de663
7cb1421
f796140
c962e70
22b547b
a155986
5aa96dd
81c5802
a56c6af
4a0152e
3aed293
4c9add8
11643bc
edd802f
9339b80
070481c
e8d42b0
c20f2cd
9220944
4193e03
816f2fc
dfb3c10
1a318ef
4480c67
865cb8b
a4d75da
7939eae
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -74,7 +74,14 @@ | |
get_groupby, | ||
group_selection_context, | ||
) | ||
from pandas.core.indexes.api import Index, MultiIndex, all_indexes_same | ||
from pandas.core.indexes.api import ( | ||
DatetimeIndex, | ||
Index, | ||
MultiIndex, | ||
PeriodIndex, | ||
TimedeltaIndex, | ||
all_indexes_same, | ||
) | ||
import pandas.core.indexes.base as ibase | ||
from pandas.core.internals import BlockManager | ||
from pandas.core.series import Series | ||
|
@@ -256,16 +263,25 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) | |
if self.grouper.nkeys > 1: | ||
return self._python_agg_general(func, *args, **kwargs) | ||
|
||
try: | ||
return self._python_agg_general(func, *args, **kwargs) | ||
except (ValueError, KeyError): | ||
# TODO: KeyError is raised in _python_agg_general, | ||
# see see test_groupby.test_basic | ||
result = self._aggregate_named(func, *args, **kwargs) | ||
if isinstance( | ||
self._selected_obj.index, (DatetimeIndex, TimedeltaIndex, PeriodIndex) | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is the one place where i'm not using
1.5) in create_series_with_explicit_dtype would need to change dtype_if_empty=object to dtype_if_empty=obj.dtype (which im not 100% sure about)
|
||
): | ||
# using _python_agg_general would end up incorrectly patching | ||
# _index_data in reduction.pyx | ||
result = self._aggregate_maybe_named(func, *args, **kwargs) | ||
else: | ||
try: | ||
return self._python_agg_general(func, *args, **kwargs) | ||
except (ValueError, KeyError): | ||
# TODO: KeyError is raised in _python_agg_general, | ||
# see see test_groupby.test_basic | ||
result = self._aggregate_maybe_named(func, *args, **kwargs) | ||
|
||
index = Index(sorted(result), name=self.grouper.names[0]) | ||
# name setting -> test_metadata_propagation_indiv | ||
index = self.grouper.result_index | ||
obj = self._selected_obj | ||
ret = create_series_with_explicit_dtype( | ||
result, index=index, dtype_if_empty=object | ||
result, index=index, dtype_if_empty=object, name=obj.name | ||
) | ||
|
||
if not self.as_index: # pragma: no cover | ||
|
@@ -469,14 +485,34 @@ def _get_index() -> Index: | |
) | ||
return self._reindex_output(result) | ||
|
||
def _aggregate_named(self, func, *args, **kwargs): | ||
def _aggregate_maybe_named(self, func, *args, **kwargs): | ||
""" | ||
Try the named-aggregator first, then unnamed, which better matches | ||
what libreduction does. | ||
""" | ||
try: | ||
return self._aggregate_named(func, *args, named=True, **kwargs) | ||
except KeyError: | ||
return self._aggregate_named(func, *args, named=False, **kwargs) | ||
|
||
def _aggregate_named(self, func, *args, named: bool = True, **kwargs): | ||
result = {} | ||
|
||
for name, group in self: | ||
group.name = name | ||
for name, group in self: # TODO: could we have duplicate names? | ||
if named: | ||
group.name = name | ||
|
||
output = func(group, *args, **kwargs) | ||
if isinstance(output, (Series, Index, np.ndarray)): | ||
raise ValueError("Must produce aggregated value") | ||
if ( | ||
isinstance(output, Series) | ||
and len(output) == 1 | ||
and name in output.index | ||
): | ||
# FIXME: kludge for test_resampler_grouper.test_apply | ||
output = output.iloc[0] | ||
else: | ||
raise ValueError("Must produce aggregated value") | ||
result[name] = output | ||
|
||
return result | ||
|
Uh oh!
There was an error while loading. Please reload this page.