Skip to content

Commit f7590e6

Browse files
authored
BUG/df.agg-with-df-with-missing-values-results-in-IndexError (#58864)
* fix * improve and fix bug entry * update --------- Co-authored-by: 121238257 <abeltavares@users.noreply.github.com>
1 parent 9e7abc8 commit f7590e6

File tree

3 files changed

+32
-3
lines changed

3 files changed

+32
-3
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,7 @@ Datetimelike
456456
- Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`)
457457
- Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56382`)
458458
- Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`)
459+
- Bug in :meth:`Dataframe.agg` with df with missing values resulting in IndexError (:issue:`58810`)
459460
- Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` does not raise on Custom business days frequencies bigger then "1C" (:issue:`58664`)
460461
- Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` returning ``False`` on double-digit frequencies (:issue:`58523`)
461462
- Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)

pandas/core/apply.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1850,11 +1850,13 @@ def relabel_result(
18501850
com.get_callable_name(f) if not isinstance(f, str) else f for f in fun
18511851
]
18521852
col_idx_order = Index(s.index).get_indexer(fun)
1853-
s = s.iloc[col_idx_order]
1854-
1853+
valid_idx = col_idx_order != -1
1854+
if valid_idx.any():
1855+
s = s.iloc[col_idx_order[valid_idx]]
18551856
# assign the new user-provided "named aggregation" as index names, and reindex
18561857
# it based on the whole user-provided names.
1857-
s.index = reordered_indexes[idx : idx + len(fun)]
1858+
if not s.empty:
1859+
s.index = reordered_indexes[idx : idx + len(fun)]
18581860
reordered_result_in_dict[col] = s.reindex(columns)
18591861
idx = idx + len(fun)
18601862
return reordered_result_in_dict

pandas/tests/groupby/aggregate/test_aggregate.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,32 @@ def test_agg_ser_multi_key(df):
6262
tm.assert_series_equal(results, expected)
6363

6464

65+
def test_agg_with_missing_values():
66+
# GH#58810
67+
missing_df = DataFrame(
68+
{
69+
"nan": [np.nan, np.nan, np.nan, np.nan],
70+
"na": [pd.NA, pd.NA, pd.NA, pd.NA],
71+
"nat": [pd.NaT, pd.NaT, pd.NaT, pd.NaT],
72+
"none": [None, None, None, None],
73+
"values": [1, 2, 3, 4],
74+
}
75+
)
76+
77+
result = missing_df.agg(x=("nan", "min"), y=("na", "min"), z=("values", "sum"))
78+
79+
expected = DataFrame(
80+
{
81+
"nan": [np.nan, np.nan, np.nan],
82+
"na": [np.nan, np.nan, np.nan],
83+
"values": [np.nan, np.nan, 10.0],
84+
},
85+
index=["x", "y", "z"],
86+
)
87+
88+
tm.assert_frame_equal(result, expected)
89+
90+
6591
def test_groupby_aggregation_mixed_dtype():
6692
# GH 6212
6793
expected = DataFrame(

0 commit comments

Comments
 (0)