Closed
Description
In DataFrame._reduce
are there reasons not to have try/except
clauses around this conversion?
Lines 9581 to 9584 in 029907c
Like you have here:
Lines 9609 to 9613 in 029907c
Because then you have issues like this, which are very common in groupby
(I mean empty DataFrames are common):
pd.DataFrame(columns=["Duration"], dtype="timedelta64[ns]").sum(skipna=False)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-2442-dd5151a2feed> in <module>
----> 1 pd.DataFrame(columns=["Duration"], dtype="timedelta64[ns]").sum(skipna=False)
.../lib/python3.8/site-packages/pandas/core/generic.py in sum(self, axis, skipna, level, numeric_only, min_count, **kwargs)
11069 **kwargs,
11070 ):
> 11071 return NDFrame.sum(
11072 self, axis, skipna, level, numeric_only, min_count, **kwargs
11073 )
.../lib/python3.8/site-packages/pandas/core/generic.py in sum(self, axis, skipna, level, numeric_only, min_count, **kwargs)
10789 **kwargs,
10790 ):
> 10791 return self._min_count_stat_function(
10792 "sum", nanops.nansum, axis, skipna, level, numeric_only, min_count, **kwargs
10793 )
.../lib/python3.8/site-packages/pandas/core/generic.py in _min_count_stat_function(self, name, func, axis, skipna, level, numeric_only, min_count, **kwargs)
10771 name, axis=axis, level=level, skipna=skipna, min_count=min_count
10772 )
> 10773 return self._reduce(
10774 func,
10775 name=name,
.../lib/python3.8/site-packages/pandas/core/frame.py in _reduce(self, op, name, axis, skipna, numeric_only, filter_type, **kwds)
8855 # Even if we are object dtype, follow numpy and return
8856 # float64, see test_apply_funcs_over_empty
-> 8857 out = out.astype(np.float64)
8858 return out
8859
.../lib/python3.8/site-packages/pandas/core/generic.py in astype(self, dtype, copy, errors)
5875 else:
5876 # else, only a single dtype is given
-> 5877 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
5878 return self._constructor(new_data).__finalize__(self, method="astype")
5879
.../lib/python3.8/site-packages/pandas/core/internals/managers.py in astype(self, dtype, copy, errors)
629 self, dtype, copy: bool = False, errors: str = "raise"
630 ) -> "BlockManager":
--> 631 return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
632
633 def convert(
.../lib/python3.8/site-packages/pandas/core/internals/managers.py in apply(self, f, align_keys, ignore_failures, **kwargs)
425 applied = b.apply(f, **kwargs)
426 else:
--> 427 applied = getattr(b, f)(**kwargs)
428 except (TypeError, NotImplementedError):
429 if not ignore_failures:
.../lib/python3.8/site-packages/pandas/core/internals/blocks.py in astype(self, dtype, copy, errors)
671 vals1d = values.ravel()
672 try:
--> 673 values = astype_nansafe(vals1d, dtype, copy=True)
674 except (ValueError, TypeError):
675 # e.g. astype_nansafe can fail on object-dtype of strings
.../lib/python3.8/site-packages/pandas/core/dtypes/cast.py in astype_nansafe(arr, dtype, copy, skipna)
1061 return arr.astype(TD64NS_DTYPE, copy=copy)
1062
-> 1063 raise TypeError(f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]")
1064
1065 elif np.issubdtype(arr.dtype, np.floating) and np.issubdtype(dtype, np.integer):
TypeError: cannot astype a timedelta from [timedelta64[ns]] to [float64]
And I don't think one should depend on the the undocumented behavior of a negative min_count
to bypass that coercion while guaranteeing the same output as if min_count
were 0.
Line 9560 in 029907c
Especially with code like:
Line 1435 in 029907c
This stackoverflow question, Sum Empty DataFrame without Converting to Incompatible Type goes into more detail.