Skip to content

Commit b5984f9

Browse files
committed
merge in master
2 parents abe7453 + db062da commit b5984f9

File tree

28 files changed

+480
-406
lines changed

28 files changed

+480
-406
lines changed

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
language: python
2-
python: 3.5
2+
python: 3.7
33

44
# To turn off cached cython files and compiler cache
55
# set NOCACHE-true

doc/source/whatsnew/v1.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -766,6 +766,7 @@ Numeric
766766
- Bug in :class:`NumericIndex` construction that caused :class:`UInt64Index` to be casted to :class:`Float64Index` when integers in the ``np.uint64`` range were used to index a :class:`DataFrame` (:issue:`28279`)
767767
- Bug in :meth:`Series.interpolate` when using method=`index` with an unsorted index, would previously return incorrect results. (:issue:`21037`)
768768
- Bug in :meth:`DataFrame.round` where a :class:`DataFrame` with a :class:`CategoricalIndex` of :class:`IntervalIndex` columns would incorrectly raise a ``TypeError`` (:issue:`30063`)
769+
- Bug in :class:`DataFrame` cumulative operations (e.g. cumsum, cummax) incorrect casting to object-dtype (:issue:`19296`)
769770

770771
Conversion
771772
^^^^^^^^^^

pandas/core/arrays/boolean.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -524,7 +524,7 @@ def astype(self, dtype, copy=True):
524524
na_value = np.nan
525525
# coerce
526526
data = self._coerce_to_ndarray(na_value=na_value)
527-
return astype_nansafe(data, dtype, copy=None)
527+
return astype_nansafe(data, dtype, copy=False)
528528

529529
def value_counts(self, dropna=True):
530530
"""
@@ -755,9 +755,8 @@ def logical_method(self, other):
755755

756756
if other_is_scalar and not (other is libmissing.NA or lib.is_bool(other)):
757757
raise TypeError(
758-
"'other' should be pandas.NA or a bool. Got {} instead.".format(
759-
type(other).__name__
760-
)
758+
"'other' should be pandas.NA or a bool. "
759+
f"Got {type(other).__name__} instead."
761760
)
762761

763762
if not other_is_scalar and len(self) != len(other):
@@ -772,7 +771,7 @@ def logical_method(self, other):
772771

773772
return BooleanArray(result, mask)
774773

775-
name = "__{name}__".format(name=op.__name__)
774+
name = f"__{op.__name__}__"
776775
return set_function_name(logical_method, name, cls)
777776

778777
@classmethod
@@ -819,7 +818,7 @@ def cmp_method(self, other):
819818

820819
return BooleanArray(result, mask, copy=False)
821820

822-
name = "__{name}__".format(name=op.__name__)
821+
name = f"__{op.__name__}"
823822
return set_function_name(cmp_method, name, cls)
824823

825824
def _reduce(self, name, skipna=True, **kwargs):
@@ -922,7 +921,7 @@ def boolean_arithmetic_method(self, other):
922921

923922
return self._maybe_mask_result(result, mask, other, op_name)
924923

925-
name = "__{name}__".format(name=op_name)
924+
name = f"__{op_name}__"
926925
return set_function_name(boolean_arithmetic_method, name, cls)
927926

928927

pandas/core/arrays/integer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -546,7 +546,7 @@ def astype(self, dtype, copy=True):
546546

547547
# coerce
548548
data = self._coerce_to_ndarray()
549-
return astype_nansafe(data, dtype, copy=None)
549+
return astype_nansafe(data, dtype, copy=False)
550550

551551
@property
552552
def _ndarray_values(self) -> np.ndarray:

pandas/core/computation/engines.py

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
import abc
6+
from typing import Dict, Type
67

78
from pandas.core.computation.align import align_terms, reconstruct_object
89
from pandas.core.computation.ops import _mathops, _reductions
@@ -53,7 +54,7 @@ def convert(self) -> str:
5354
"""
5455
return printing.pprint_thing(self.expr)
5556

56-
def evaluate(self):
57+
def evaluate(self) -> object:
5758
"""
5859
Run the engine on the expression.
5960
@@ -62,7 +63,7 @@ def evaluate(self):
6263
6364
Returns
6465
-------
65-
obj : object
66+
object
6667
The result of the passed expression.
6768
"""
6869
if not self._is_aligned:
@@ -101,12 +102,6 @@ class NumExprEngine(AbstractEngine):
101102

102103
has_neg_frac = True
103104

104-
def __init__(self, expr):
105-
super().__init__(expr)
106-
107-
def convert(self) -> str:
108-
return str(super().convert())
109-
110105
def _evaluate(self):
111106
import numexpr as ne
112107

@@ -128,14 +123,14 @@ class PythonEngine(AbstractEngine):
128123

129124
has_neg_frac = False
130125

131-
def __init__(self, expr):
132-
super().__init__(expr)
133-
134126
def evaluate(self):
135127
return self.expr()
136128

137-
def _evaluate(self):
129+
def _evaluate(self) -> None:
138130
pass
139131

140132

141-
_engines = {"numexpr": NumExprEngine, "python": PythonEngine}
133+
_engines: Dict[str, Type[AbstractEngine]] = {
134+
"numexpr": NumExprEngine,
135+
"python": PythonEngine,
136+
}

pandas/core/computation/eval.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"""
66

77
import tokenize
8+
from typing import Optional
89
import warnings
910

1011
from pandas._libs.lib import _no_default
@@ -17,7 +18,7 @@
1718
from pandas.io.formats.printing import pprint_thing
1819

1920

20-
def _check_engine(engine):
21+
def _check_engine(engine: Optional[str]) -> str:
2122
"""
2223
Make sure a valid engine is passed.
2324
@@ -168,7 +169,7 @@ def _check_for_locals(expr: str, stack_level: int, parser: str):
168169
def eval(
169170
expr,
170171
parser="pandas",
171-
engine=None,
172+
engine: Optional[str] = None,
172173
truediv=_no_default,
173174
local_dict=None,
174175
global_dict=None,

pandas/core/dtypes/cast.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from pandas._libs import lib, tslib, tslibs
88
from pandas._libs.tslibs import NaT, OutOfBoundsDatetime, Period, iNaT
9+
from pandas._libs.tslibs.timezones import tz_compare
910
from pandas.util._validators import validate_bool_kwarg
1011

1112
from .common import (
@@ -409,6 +410,14 @@ def maybe_promote(dtype, fill_value=np.nan):
409410
elif is_datetime64tz_dtype(dtype):
410411
if isna(fill_value):
411412
fill_value = NaT
413+
elif not isinstance(fill_value, datetime):
414+
dtype = np.dtype(np.object_)
415+
elif fill_value.tzinfo is None:
416+
dtype = np.dtype(np.object_)
417+
elif not tz_compare(fill_value.tzinfo, dtype.tz):
418+
# TODO: sure we want to cast here?
419+
dtype = np.dtype(np.object_)
420+
412421
elif is_extension_array_dtype(dtype) and isna(fill_value):
413422
fill_value = dtype.na_value
414423

pandas/core/frame.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4014,16 +4014,14 @@ def fillna(
40144014
inplace=False,
40154015
limit=None,
40164016
downcast=None,
4017-
**kwargs,
4018-
):
4017+
) -> Optional["DataFrame"]:
40194018
return super().fillna(
40204019
value=value,
40214020
method=method,
40224021
axis=axis,
40234022
inplace=inplace,
40244023
limit=limit,
40254024
downcast=downcast,
4026-
**kwargs,
40274025
)
40284026

40294027
@Appender(_shared_docs["replace"] % _shared_doc_kwargs)
@@ -5148,7 +5146,7 @@ def reorder_levels(self, order, axis=0):
51485146
# Arithmetic / combination related
51495147

51505148
def _combine_frame(self, other, func, fill_value=None, level=None):
5151-
this, other = self.align(other, join="outer", level=level, copy=False)
5149+
# at this point we have `self._indexed_same(other)`
51525150

51535151
if fill_value is None:
51545152
# since _arith_op may be called in a loop, avoid function call
@@ -5164,14 +5162,15 @@ def _arith_op(left, right):
51645162
left, right = ops.fill_binop(left, right, fill_value)
51655163
return func(left, right)
51665164

5167-
if ops.should_series_dispatch(this, other, func):
5165+
if ops.should_series_dispatch(self, other, func):
51685166
# iterate over columns
5169-
new_data = ops.dispatch_to_series(this, other, _arith_op)
5167+
new_data = ops.dispatch_to_series(self, other, _arith_op)
51705168
else:
51715169
with np.errstate(all="ignore"):
5172-
res_values = _arith_op(this.values, other.values)
5173-
new_data = dispatch_fill_zeros(func, this.values, other.values, res_values)
5174-
return this._construct_result(new_data)
5170+
res_values = _arith_op(self.values, other.values)
5171+
new_data = dispatch_fill_zeros(func, self.values, other.values, res_values)
5172+
5173+
return new_data
51755174

51765175
def _combine_match_index(self, other, func):
51775176
# at this point we have `self.index.equals(other.index)`

pandas/core/generic.py

Lines changed: 57 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -11086,44 +11086,66 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs):
1108611086
else:
1108711087
axis = self._get_axis_number(axis)
1108811088

11089-
y = com.values_from_object(self).copy()
11090-
d = self._construct_axes_dict()
11091-
d["copy"] = False
11089+
if axis == 1:
11090+
return cum_func(self.T, axis=0, skipna=skipna, *args, **kwargs).T
11091+
11092+
def na_accum_func(blk_values):
11093+
# We will be applying this function to block values
11094+
if blk_values.dtype.kind in ["m", "M"]:
11095+
# numpy 1.18 started sorting NaTs at the end instead of beginning,
11096+
# so we need to work around to maintain backwards-consistency.
11097+
orig_dtype = blk_values.dtype
11098+
11099+
# We need to define mask before masking NaTs
11100+
mask = isna(blk_values)
11101+
11102+
if accum_func == np.minimum.accumulate:
11103+
# Note: the accum_func comparison fails as an "is" comparison
11104+
y = blk_values.view("i8")
11105+
y[mask] = np.iinfo(np.int64).max
11106+
changed = True
11107+
else:
11108+
y = blk_values
11109+
changed = False
11110+
11111+
result = accum_func(y.view("i8"), axis)
11112+
if skipna:
11113+
np.putmask(result, mask, iNaT)
11114+
elif accum_func == np.minimum.accumulate:
11115+
# Restore NaTs that we masked previously
11116+
nz = (~np.asarray(mask)).nonzero()[0]
11117+
if len(nz):
11118+
# everything up to the first non-na entry stays NaT
11119+
result[: nz[0]] = iNaT
11120+
11121+
if changed:
11122+
# restore NaT elements
11123+
y[mask] = iNaT # TODO: could try/finally for this?
11124+
11125+
if isinstance(blk_values, np.ndarray):
11126+
result = result.view(orig_dtype)
11127+
else:
11128+
# DatetimeArray
11129+
result = type(blk_values)._from_sequence(result, dtype=orig_dtype)
11130+
11131+
elif skipna and not issubclass(
11132+
blk_values.dtype.type, (np.integer, np.bool_)
11133+
):
11134+
vals = blk_values.copy().T
11135+
mask = isna(vals)
11136+
np.putmask(vals, mask, mask_a)
11137+
result = accum_func(vals, axis)
11138+
np.putmask(result, mask, mask_b)
11139+
else:
11140+
result = accum_func(blk_values.T, axis)
1109211141

11093-
if issubclass(y.dtype.type, (np.datetime64, np.timedelta64)):
11094-
# numpy 1.18 started sorting NaTs at the end instead of beginning,
11095-
# so we need to work around to maintain backwards-consistency.
11096-
orig_dtype = y.dtype
11097-
if accum_func == np.minimum.accumulate:
11098-
# Note: the accum_func comparison fails as an "is" comparison
11099-
# Note that "y" is always a copy, so we can safely modify it
11100-
mask = isna(self)
11101-
y = y.view("i8")
11102-
y[mask] = np.iinfo(np.int64).max
11103-
11104-
result = accum_func(y.view("i8"), axis).view(orig_dtype)
11105-
if skipna:
11106-
mask = isna(self)
11107-
np.putmask(result, mask, iNaT)
11108-
elif accum_func == np.minimum.accumulate:
11109-
# Restore NaTs that we masked previously
11110-
nz = (~np.asarray(mask)).nonzero()[0]
11111-
if len(nz):
11112-
# everything up to the first non-na entry stays NaT
11113-
result[: nz[0]] = iNaT
11142+
# transpose back for ndarray, not for EA
11143+
return result.T if hasattr(result, "T") else result
1111411144

11115-
if self.ndim == 1:
11116-
# restore dt64tz dtype
11117-
d["dtype"] = self.dtype
11118-
11119-
elif skipna and not issubclass(y.dtype.type, (np.integer, np.bool_)):
11120-
mask = isna(self)
11121-
np.putmask(y, mask, mask_a)
11122-
result = accum_func(y, axis)
11123-
np.putmask(result, mask, mask_b)
11124-
else:
11125-
result = accum_func(y, axis)
11145+
result = self._data.apply(na_accum_func)
1112611146

11147+
d = self._construct_axes_dict()
11148+
d["copy"] = False
1112711149
return self._constructor(result, **d).__finalize__(self)
1112811150

1112911151
return set_function_name(cum_func, name, cls)

0 commit comments

Comments
 (0)