Skip to content

Commit 627d46e

Browse files
committed
Merge remote-tracking branch 'upstream/master' into datetime-type-inference
2 parents 5e4cf73 + 4a5f9da commit 627d46e

File tree

9 files changed

+172
-71
lines changed

9 files changed

+172
-71
lines changed

doc/source/whatsnew/v1.1.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ Other enhancements
9999
``df.to_csv(path, compression={'method': 'gzip', 'compresslevel': 1}``
100100
(:issue:`33196`)
101101
- :meth:`~pandas.core.groupby.GroupBy.transform` has gained ``engine`` and ``engine_kwargs`` arguments that supports executing functions with ``Numba`` (:issue:`32854`)
102+
- :meth:`~pandas.core.resample.Resampler.interpolate` now supports SciPy interpolation method :class:`scipy.interpolate.CubicSpline` as method ``cubicspline`` (:issue:`33670`)
102103
-
103104

104105
.. ---------------------------------------------------------------------------

pandas/core/arrays/datetimelike.py

Lines changed: 39 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -60,29 +60,24 @@ def _datetimelike_array_cmp(cls, op):
6060
opname = f"__{op.__name__}__"
6161
nat_result = opname == "__ne__"
6262

63-
@unpack_zerodim_and_defer(opname)
64-
def wrapper(self, other):
63+
class InvalidComparison(Exception):
64+
pass
6565

66+
def _validate_comparison_value(self, other):
6667
if isinstance(other, str):
6768
try:
6869
# GH#18435 strings get a pass from tzawareness compat
6970
other = self._scalar_from_string(other)
7071
except ValueError:
7172
# failed to parse as Timestamp/Timedelta/Period
72-
return invalid_comparison(self, other, op)
73+
raise InvalidComparison(other)
7374

7475
if isinstance(other, self._recognized_scalars) or other is NaT:
7576
other = self._scalar_type(other)
7677
self._check_compatible_with(other)
7778

78-
other_i8 = self._unbox_scalar(other)
79-
80-
result = op(self.view("i8"), other_i8)
81-
if isna(other):
82-
result.fill(nat_result)
83-
8479
elif not is_list_like(other):
85-
return invalid_comparison(self, other, op)
80+
raise InvalidComparison(other)
8681

8782
elif len(other) != len(self):
8883
raise ValueError("Lengths must match")
@@ -93,34 +88,50 @@ def wrapper(self, other):
9388
other = np.array(other)
9489

9590
if not isinstance(other, (np.ndarray, type(self))):
96-
return invalid_comparison(self, other, op)
97-
98-
if is_object_dtype(other):
99-
# We have to use comp_method_OBJECT_ARRAY instead of numpy
100-
# comparison otherwise it would fail to raise when
101-
# comparing tz-aware and tz-naive
102-
with np.errstate(all="ignore"):
103-
result = ops.comp_method_OBJECT_ARRAY(
104-
op, self.astype(object), other
105-
)
106-
o_mask = isna(other)
91+
raise InvalidComparison(other)
92+
93+
elif is_object_dtype(other.dtype):
94+
pass
10795

10896
elif not type(self)._is_recognized_dtype(other.dtype):
109-
return invalid_comparison(self, other, op)
97+
raise InvalidComparison(other)
11098

11199
else:
112100
# For PeriodDType this casting is unnecessary
101+
# TODO: use Index to do inference?
113102
other = type(self)._from_sequence(other)
114103
self._check_compatible_with(other)
115104

116-
result = op(self.view("i8"), other.view("i8"))
117-
o_mask = other._isnan
105+
return other
118106

119-
if o_mask.any():
120-
result[o_mask] = nat_result
107+
@unpack_zerodim_and_defer(opname)
108+
def wrapper(self, other):
121109

122-
if self._hasnans:
123-
result[self._isnan] = nat_result
110+
try:
111+
other = _validate_comparison_value(self, other)
112+
except InvalidComparison:
113+
return invalid_comparison(self, other, op)
114+
115+
dtype = getattr(other, "dtype", None)
116+
if is_object_dtype(dtype):
117+
# We have to use comp_method_OBJECT_ARRAY instead of numpy
118+
# comparison otherwise it would fail to raise when
119+
# comparing tz-aware and tz-naive
120+
with np.errstate(all="ignore"):
121+
result = ops.comp_method_OBJECT_ARRAY(op, self.astype(object), other)
122+
return result
123+
124+
if isinstance(other, self._scalar_type) or other is NaT:
125+
other_i8 = self._unbox_scalar(other)
126+
else:
127+
# Then type(other) == type(self)
128+
other_i8 = other.asi8
129+
130+
result = op(self.asi8, other_i8)
131+
132+
o_mask = isna(other)
133+
if self._hasnans | np.any(o_mask):
134+
result[self._isnan | o_mask] = nat_result
124135

125136
return result
126137

pandas/core/frame.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3649,7 +3649,9 @@ def reindexer(value):
36493649
@property
36503650
def _series(self):
36513651
return {
3652-
item: Series(self._mgr.iget(idx), index=self.index, name=item)
3652+
item: Series(
3653+
self._mgr.iget(idx), index=self.index, name=item, fastpath=True
3654+
)
36533655
for idx, item in enumerate(self.columns)
36543656
}
36553657

pandas/core/generic.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6671,9 +6671,9 @@ def replace(
66716671
values of the index. Both 'polynomial' and 'spline' require that
66726672
you also specify an `order` (int), e.g.
66736673
``df.interpolate(method='polynomial', order=5)``.
6674-
* 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima':
6675-
Wrappers around the SciPy interpolation methods of similar
6676-
names. See `Notes`.
6674+
* 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima',
6675+
'cubicspline': Wrappers around the SciPy interpolation methods of
6676+
similar names. See `Notes`.
66776677
* 'from_derivatives': Refers to
66786678
`scipy.interpolate.BPoly.from_derivatives` which
66796679
replaces 'piecewise_polynomial' interpolation method in
@@ -11208,9 +11208,7 @@ def block_accum_func(blk_values):
1120811208

1120911209
result = self._mgr.apply(block_accum_func)
1121011210

11211-
d = self._construct_axes_dict()
11212-
d["copy"] = False
11213-
return self._constructor(result, **d).__finalize__(self, method=name)
11211+
return self._constructor(result).__finalize__(self, method=name)
1121411212

1121511213
return set_function_name(cum_func, name, cls)
1121611214

pandas/core/indexes/datetimelike.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Base and utility classes for tseries type pandas objects.
33
"""
44
from datetime import datetime
5-
from typing import Any, List, Optional, Union
5+
from typing import Any, List, Optional, Union, cast
66

77
import numpy as np
88

@@ -583,6 +583,22 @@ def delete(self, loc):
583583
arr = type(self._data)._simple_new(new_i8s, dtype=self.dtype, freq=freq)
584584
return type(self)._simple_new(arr, name=self.name)
585585

586+
# --------------------------------------------------------------------
587+
# Join/Set Methods
588+
589+
def _wrap_joined_index(self, joined: np.ndarray, other):
590+
assert other.dtype == self.dtype, (other.dtype, self.dtype)
591+
name = get_op_result_name(self, other)
592+
593+
if is_period_dtype(self.dtype):
594+
freq = self.freq
595+
else:
596+
self = cast(DatetimeTimedeltaMixin, self)
597+
freq = self.freq if self._can_fast_union(other) else None
598+
new_data = type(self._data)._simple_new(joined, dtype=self.dtype, freq=freq)
599+
600+
return type(self)._simple_new(new_data, name=name)
601+
586602

587603
class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin, Int64Index):
588604
"""
@@ -878,15 +894,6 @@ def _is_convertible_to_index_for_join(cls, other: Index) -> bool:
878894
return True
879895
return False
880896

881-
def _wrap_joined_index(self, joined: np.ndarray, other):
882-
assert other.dtype == self.dtype, (other.dtype, self.dtype)
883-
name = get_op_result_name(self, other)
884-
885-
freq = self.freq if self._can_fast_union(other) else None
886-
new_data = type(self._data)._simple_new(joined, dtype=self.dtype, freq=freq)
887-
888-
return type(self)._simple_new(new_data, name=name)
889-
890897
# --------------------------------------------------------------------
891898
# List-Like Methods
892899

pandas/core/indexes/period.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,7 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False)
628628
other, how=how, level=level, return_indexers=return_indexers, sort=sort
629629
)
630630

631+
# _assert_can_do_setop ensures we have matching dtype
631632
result = Int64Index.join(
632633
self,
633634
other,
@@ -636,11 +637,7 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False)
636637
return_indexers=return_indexers,
637638
sort=sort,
638639
)
639-
640-
if return_indexers:
641-
result, lidx, ridx = result
642-
return self._apply_meta(result), lidx, ridx
643-
return self._apply_meta(result)
640+
return result
644641

645642
# ------------------------------------------------------------------------
646643
# Set Operation Methods
@@ -719,13 +716,6 @@ def _union(self, other, sort):
719716

720717
# ------------------------------------------------------------------------
721718

722-
def _apply_meta(self, rawarr) -> "PeriodIndex":
723-
if not isinstance(rawarr, PeriodIndex):
724-
if not isinstance(rawarr, PeriodArray):
725-
rawarr = PeriodArray(rawarr, freq=self.freq)
726-
rawarr = PeriodIndex._simple_new(rawarr, name=self.name)
727-
return rawarr
728-
729719
def memory_usage(self, deep=False):
730720
result = super().memory_usage(deep=deep)
731721
if hasattr(self, "_cache") and "_int64index" in self._cache:

pandas/core/missing.py

Lines changed: 86 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ def clean_interp_method(method, **kwargs):
112112
"akima",
113113
"spline",
114114
"from_derivatives",
115+
"cubicspline",
115116
]
116117
if method in ("spline", "polynomial") and order is None:
117118
raise ValueError("You must specify the order of the spline or polynomial.")
@@ -293,6 +294,7 @@ def interpolate_1d(
293294
"piecewise_polynomial",
294295
"pchip",
295296
"akima",
297+
"cubicspline",
296298
]
297299

298300
if method in sp_methods:
@@ -341,14 +343,11 @@ def _interpolate_scipy_wrapper(
341343
x, new_x = x._values.astype("i8"), new_x.astype("i8")
342344

343345
if method == "pchip":
344-
try:
345-
alt_methods["pchip"] = interpolate.pchip_interpolate
346-
except AttributeError as err:
347-
raise ImportError(
348-
"Your version of Scipy does not support PCHIP interpolation."
349-
) from err
346+
alt_methods["pchip"] = interpolate.pchip_interpolate
350347
elif method == "akima":
351348
alt_methods["akima"] = _akima_interpolate
349+
elif method == "cubicspline":
350+
alt_methods["cubicspline"] = _cubicspline_interpolate
352351

353352
interp1d_methods = [
354353
"nearest",
@@ -406,7 +405,7 @@ def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False):
406405
der : int or list
407406
How many derivatives to extract; None for all potentially nonzero
408407
derivatives (that is a number equal to the number of points), or a
409-
list of derivatives to extract. This numberincludes the function
408+
list of derivatives to extract. This number includes the function
410409
value as 0th derivative.
411410
extrapolate : bool, optional
412411
Whether to extrapolate to ouf-of-bounds points based on first and last
@@ -446,8 +445,7 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0):
446445
A 1-D array of real values. `yi`'s length along the interpolation
447446
axis must be equal to the length of `xi`. If N-D array, use axis
448447
parameter to select correct axis.
449-
x : scalar or array_like
450-
Of length M.
448+
x : scalar or array_like of length M.
451449
der : int or list, optional
452450
How many derivatives to extract; None for all potentially
453451
nonzero derivatives (that is a number equal to the number
@@ -478,6 +476,85 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0):
478476
return [P(x, nu) for nu in der]
479477

480478

479+
def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolate=None):
480+
"""
481+
Convenience function for cubic spline data interpolator.
482+
483+
See `scipy.interpolate.CubicSpline` for details.
484+
485+
Parameters
486+
----------
487+
xi : array_like, shape (n,)
488+
1-d array containing values of the independent variable.
489+
Values must be real, finite and in strictly increasing order.
490+
yi : array_like
491+
Array containing values of the dependent variable. It can have
492+
arbitrary number of dimensions, but the length along ``axis``
493+
(see below) must match the length of ``x``. Values must be finite.
494+
x : scalar or array_like, shape (m,)
495+
axis : int, optional
496+
Axis along which `y` is assumed to be varying. Meaning that for
497+
``x[i]`` the corresponding values are ``np.take(y, i, axis=axis)``.
498+
Default is 0.
499+
bc_type : string or 2-tuple, optional
500+
Boundary condition type. Two additional equations, given by the
501+
boundary conditions, are required to determine all coefficients of
502+
polynomials on each segment [2]_.
503+
If `bc_type` is a string, then the specified condition will be applied
504+
at both ends of a spline. Available conditions are:
505+
* 'not-a-knot' (default): The first and second segment at a curve end
506+
are the same polynomial. It is a good default when there is no
507+
information on boundary conditions.
508+
* 'periodic': The interpolated functions is assumed to be periodic
509+
of period ``x[-1] - x[0]``. The first and last value of `y` must be
510+
identical: ``y[0] == y[-1]``. This boundary condition will result in
511+
``y'[0] == y'[-1]`` and ``y''[0] == y''[-1]``.
512+
* 'clamped': The first derivative at curves ends are zero. Assuming
513+
a 1D `y`, ``bc_type=((1, 0.0), (1, 0.0))`` is the same condition.
514+
* 'natural': The second derivative at curve ends are zero. Assuming
515+
a 1D `y`, ``bc_type=((2, 0.0), (2, 0.0))`` is the same condition.
516+
If `bc_type` is a 2-tuple, the first and the second value will be
517+
applied at the curve start and end respectively. The tuple values can
518+
be one of the previously mentioned strings (except 'periodic') or a
519+
tuple `(order, deriv_values)` allowing to specify arbitrary
520+
derivatives at curve ends:
521+
* `order`: the derivative order, 1 or 2.
522+
* `deriv_value`: array_like containing derivative values, shape must
523+
be the same as `y`, excluding ``axis`` dimension. For example, if
524+
`y` is 1D, then `deriv_value` must be a scalar. If `y` is 3D with
525+
the shape (n0, n1, n2) and axis=2, then `deriv_value` must be 2D
526+
and have the shape (n0, n1).
527+
extrapolate : {bool, 'periodic', None}, optional
528+
If bool, determines whether to extrapolate to out-of-bounds points
529+
based on first and last intervals, or to return NaNs. If 'periodic',
530+
periodic extrapolation is used. If None (default), ``extrapolate`` is
531+
set to 'periodic' for ``bc_type='periodic'`` and to True otherwise.
532+
533+
See Also
534+
--------
535+
scipy.interpolate.CubicHermiteSpline
536+
537+
Returns
538+
-------
539+
y : scalar or array_like
540+
The result, of shape (m,)
541+
542+
References
543+
----------
544+
.. [1] `Cubic Spline Interpolation
545+
<https://en.wikiversity.org/wiki/Cubic_Spline_Interpolation>`_
546+
on Wikiversity.
547+
.. [2] Carl de Boor, "A Practical Guide to Splines", Springer-Verlag, 1978.
548+
"""
549+
from scipy import interpolate
550+
551+
P = interpolate.CubicSpline(
552+
xi, yi, axis=axis, bc_type=bc_type, extrapolate=extrapolate
553+
)
554+
555+
return P(x)
556+
557+
481558
def interpolate_2d(
482559
values, method="pad", axis=0, limit=None, fill_value=None, dtype=None
483560
):

pandas/core/series.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -259,12 +259,8 @@ def __init__(
259259
# astype copies
260260
data = data.astype(dtype)
261261
else:
262-
# need to copy to avoid aliasing issues
262+
# GH#24096 we need to ensure the index remains immutable
263263
data = data._values.copy()
264-
if isinstance(data, ABCDatetimeIndex) and data.tz is not None:
265-
# GH#24096 need copy to be deep for datetime64tz case
266-
# TODO: See if we can avoid these copies
267-
data = data._values.copy(deep=True)
268264
copy = False
269265

270266
elif isinstance(data, np.ndarray):
@@ -280,6 +276,7 @@ def __init__(
280276
index = data.index
281277
else:
282278
data = data.reindex(index, copy=copy)
279+
copy = False
283280
data = data._mgr
284281
elif is_dict_like(data):
285282
data, index = self._init_dict(data, index, dtype)

0 commit comments

Comments
 (0)