Skip to content

PERF: block-wise ops for scalar and series #28774

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 44 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
9a617e3
REF: implement logical and comparison array ops
jbrockmendel Sep 12, 2019
3f414b1
Merge branch 'master' of https://github.com/pandas-dev/pandas into ar…
jbrockmendel Sep 13, 2019
396b4a8
Merge branch 'master' of https://github.com/pandas-dev/pandas into ar…
jbrockmendel Sep 13, 2019
56dff20
implement arithmetic_op
jbrockmendel Sep 13, 2019
77e3241
Merge branch 'master' of https://github.com/pandas-dev/pandas into ar…
jbrockmendel Sep 13, 2019
148a8e8
add comments, types
jbrockmendel Sep 13, 2019
fcf9735
typo fixup
jbrockmendel Sep 13, 2019
fec86de
revert types
jbrockmendel Sep 14, 2019
2abdccb
add types
jbrockmendel Sep 17, 2019
121d783
docstrings
jbrockmendel Sep 17, 2019
5faa820
Merge branch 'master' of https://github.com/pandas-dev/pandas into ar…
jbrockmendel Sep 17, 2019
267c7ca
ignore type
jbrockmendel Sep 17, 2019
0b5aa34
revert technically-incorrect type
jbrockmendel Sep 17, 2019
ad6da57
Merge branch 'master' of https://github.com/pandas-dev/pandas into ar…
jbrockmendel Sep 18, 2019
8ced97b
REF: move na_op out
jbrockmendel Sep 18, 2019
6b9bce0
Merge branch 'master' of https://github.com/pandas-dev/pandas into ar…
jbrockmendel Sep 18, 2019
b0d6263
Merge branch 'master' of https://github.com/pandas-dev/pandas into ar…
jbrockmendel Sep 19, 2019
524a1fb
Checkpoint, 5 expressions tests failing
jbrockmendel Sep 20, 2019
504a12d
Merge branch 'master' of https://github.com/pandas-dev/pandas into ar…
jbrockmendel Sep 20, 2019
e968517
revert
jbrockmendel Sep 20, 2019
709b1db
revert
jbrockmendel Sep 20, 2019
274188a
tests passing
jbrockmendel Sep 20, 2019
8f8f527
Merge branch 'master' of https://github.com/pandas-dev/pandas into ar…
jbrockmendel Sep 20, 2019
7561f05
OK
jbrockmendel Sep 20, 2019
837f028
revert
jbrockmendel Sep 20, 2019
a6eada6
revert
jbrockmendel Sep 20, 2019
936be5f
revert
jbrockmendel Sep 20, 2019
5176a59
Merge branch 'master' of https://github.com/pandas-dev/pandas into ar…
jbrockmendel Sep 23, 2019
26d1696
Merge branch 'master' of https://github.com/pandas-dev/pandas into ar…
jbrockmendel Sep 23, 2019
01e4922
Fix tests by passing eval_kwargs
jbrockmendel Sep 23, 2019
16587e2
update tests
jbrockmendel Sep 23, 2019
b735d71
reenable check
jbrockmendel Sep 23, 2019
4dd8944
lint fixup
jbrockmendel Sep 23, 2019
829e72a
Merge branch 'master' of https://github.com/pandas-dev/pandas into ar…
jbrockmendel Sep 27, 2019
7a80613
handling for Series with axis!=columns
jbrockmendel Sep 28, 2019
b3f88ef
Merge branch 'master' of https://github.com/pandas-dev/pandas into ar…
jbrockmendel Oct 1, 2019
e73724a
Block-wise ops for both Series cases
jbrockmendel Oct 3, 2019
6d1655a
Merge branch 'master' of https://github.com/pandas-dev/pandas into ar…
jbrockmendel Oct 3, 2019
266d1fa
Merge branch 'master' of https://github.com/pandas-dev/pandas into ar…
jbrockmendel Oct 3, 2019
a8c7c64
Merge branch 'master' of https://github.com/pandas-dev/pandas into ar…
jbrockmendel Oct 10, 2019
8d731b5
flake8 fixups
jbrockmendel Oct 10, 2019
82f3c45
flake8 fixups
jbrockmendel Oct 10, 2019
25b1670
blackify
jbrockmendel Oct 10, 2019
f851656
Merge branch 'master' of https://github.com/pandas-dev/pandas into ar…
jbrockmendel Oct 14, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 50 additions & 7 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,32 @@ class DatetimeLikeArrayMixin(ExtensionOpsMixin, AttributesMixin, ExtensionArray)
_generate_range
"""

@property
def ndim(self):
return self._data.ndim

@property
def shape(self):
return self._data.shape

def __len__(self):
return len(self._data)

@property
def T(self):
# Note: we drop any freq
return type(self)(self._data.T, dtype=self.dtype)

def reshape(self, *args, **kwargs):
# Note: we drop any freq
data = self._data.reshape(*args, **kwargs)
return type(self)(data, dtype=self.dtype)

def ravel(self, *args, **kwargs):
# Note: we drop any freq
data = self._data.ravel(*args, **kwargs)
return type(self)(data, dtype=self.dtype)

@property
def _box_func(self):
"""
Expand Down Expand Up @@ -396,9 +422,6 @@ def size(self) -> int:
"""The number of elements in this array."""
return np.prod(self.shape)

def __len__(self):
return len(self._data)

def __getitem__(self, key):
"""
This getitem defers to the underlying array, which by-definition can
Expand All @@ -416,7 +439,10 @@ def __getitem__(self, key):
getitem = self._data.__getitem__
if is_int:
val = getitem(key)
return self._box_func(val)
if lib.is_scalar(val):
return self._box_func(val)
else:
return type(self)(val, dtype=self.dtype)

if com.is_bool_indexer(key):
key = np.asarray(key, dtype=bool)
Expand Down Expand Up @@ -446,6 +472,7 @@ def __getitem__(self, key):
# even though it only has 1 dim by definition
if is_period:
return self._simple_new(result, dtype=self.dtype, freq=freq)
return self._simple_new(result, dtype=self.dtype)
return result

return self._simple_new(result, dtype=self.dtype, freq=freq)
Expand Down Expand Up @@ -1009,6 +1036,11 @@ def _add_delta_tdi(self, other):

other = TimedeltaArray._from_sequence(other)

if self.ndim == 2 and other.ndim == 1:
# we already know the lengths match
od = other._data[:, None]
other = type(other)(od)

self_i8 = self.asi8
other_i8 = other.asi8
new_values = checked_add_with_arr(
Expand All @@ -1032,7 +1064,7 @@ def _add_nat(self):

# GH#19124 pd.NaT is treated like a timedelta for both timedelta
# and datetime dtypes
result = np.zeros(len(self), dtype=np.int64)
result = np.zeros(self.shape, dtype=np.int64)
result.fill(iNaT)
return type(self)(result, dtype=self.dtype, freq=None)

Expand All @@ -1046,7 +1078,7 @@ def _sub_nat(self):
# For datetime64 dtypes by convention we treat NaT as a datetime, so
# this subtraction returns a timedelta64 dtype.
# For period dtype, timedelta64 is a close-enough return dtype.
result = np.zeros(len(self), dtype=np.int64)
result = np.zeros(self.shape, dtype=np.int64)
result.fill(iNaT)
return result.view("timedelta64[ns]")

Expand Down Expand Up @@ -1147,8 +1179,12 @@ def _addsub_offset_array(self, other, op):
PerformanceWarning,
)

# For EA self.astype('O') returns a numpy array, not an Index
if self.ndim == 2:
result = self.ravel()._addsub_offset_array(other.ravel(), op)
return result.reshape(self.shape) # FIXME: case with order mismatch

left = self.astype("O")
assert left.shape == other.shape

res_values = op(left, np.array(other))
kwargs = {}
Expand Down Expand Up @@ -1222,13 +1258,16 @@ def __add__(self, other):
elif is_offsetlike(other):
# Array/Index of DateOffset objects
result = self._addsub_offset_array(other, operator.add)
# FIXME: just do this for object-dtype
elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
# DatetimeIndex, ndarray[datetime64]
return self._add_datetime_arraylike(other)
elif is_integer_dtype(other):
if not is_period_dtype(self):
maybe_integer_op_deprecated(self)
result = self._addsub_int_array(other, operator.add)
elif is_object_dtype(other):
result = self._addsub_offset_array(other, operator.add)
else:
# Includes Categorical, other ExtensionArrays
# For PeriodDtype, if self is a TimedeltaArray and other is a
Expand Down Expand Up @@ -1279,6 +1318,7 @@ def __sub__(self, other):
elif is_offsetlike(other):
# Array/Index of DateOffset objects
result = self._addsub_offset_array(other, operator.sub)
# TODO: just do this for arbitrary object-dtype
elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
# DatetimeIndex, ndarray[datetime64]
result = self._sub_datetime_arraylike(other)
Expand All @@ -1289,6 +1329,9 @@ def __sub__(self, other):
if not is_period_dtype(self):
maybe_integer_op_deprecated(self)
result = self._addsub_int_array(other, operator.sub)
elif is_object_dtype(other):
result = self._addsub_offset_array(other, operator.sub)
# TODO: just do this for arbitrary object-dtype
else:
# Includes ExtensionArrays, float_dtype
return NotImplemented
Expand Down
15 changes: 14 additions & 1 deletion pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,17 @@
"""


def compat_2d(meth):
def new_meth(self, *args, **kwargs):
if self.ndim > 1:
result = meth(self.ravel(), *args, **kwargs)
return result.reshape(self.shape)
return meth(self, *args, **kwargs)

new_meth.__name__ = meth.__name__
return new_meth


def tz_to_dtype(tz):
"""
Return a datetime64[ns] dtype appropriate for the given timezone.
Expand Down Expand Up @@ -361,7 +372,7 @@ def __init__(self, values, dtype=_NS_DTYPE, freq=None, copy=False):
"ndarray, or Series or Index containing one of those."
)
raise ValueError(msg.format(type(values).__name__))
if values.ndim != 1:
if values.ndim not in [1, 2]:
raise ValueError("Only 1-dimensional input arrays are supported.")

if values.dtype == "i8":
Expand Down Expand Up @@ -818,13 +829,15 @@ def _sub_datetime_arraylike(self, other):
new_values[arr_mask] = iNaT
return new_values.view("timedelta64[ns]")

@compat_2d
def _add_offset(self, offset):
assert not isinstance(offset, Tick)
try:
if self.tz is not None:
values = self.tz_localize(None)
else:
values = self

result = offset.apply_index(values)
if self.tz is not None:
result = result.tz_localize(self.tz)
Expand Down
30 changes: 25 additions & 5 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,31 @@ def wrapper(self, other):
result[mask] = nat_result

return result
elif is_list_like(other):
try:
new_other = cls._from_sequence(other)
except TypeError:
result = np.empty(self.shape, dtype=bool)
result.fill(nat_result)
else:
return op(self, new_other)
elif other is NaT:
result = np.empty(len(self.asi8), dtype=bool)
result = np.empty(self.shape, dtype=bool)
result.fill(nat_result)
else:
other = Period(other, freq=self.freq)
result = ordinal_op(other.ordinal)
try:
other = Period(other, freq=self.freq)
except IncompatibleFrequency:
raise
except (ValueError, TypeError):
# TODO: use invalid_comparison
if op.__name__ in ["eq", "ne"]:
result = np.empty(self.shape, dtype=bool)
result.fill(nat_result)
else:
raise TypeError
else:
result = ordinal_op(other.ordinal)

if self._hasnans:
result[self._isnan] = nat_result
Expand Down Expand Up @@ -248,8 +267,9 @@ def _from_sequence(
if copy:
periods = periods.copy()

freq = freq or libperiod.extract_freq(periods)
ordinals = libperiod.extract_ordinals(periods, freq)
freq = freq or libperiod.extract_freq(periods.ravel())
ordinals1d = libperiod.extract_ordinals(periods.ravel(), freq)
ordinals = ordinals1d.reshape(periods.shape)
return cls(ordinals, freq=freq)

@classmethod
Expand Down
30 changes: 27 additions & 3 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def __init__(self, values, dtype=_TD_DTYPE, freq=None, copy=False):
"ndarray, or Series or Index containing one of those."
)
raise ValueError(msg.format(type(values).__name__))
if values.ndim != 1:
if values.ndim not in [1, 2]:
raise ValueError("Only 1-dimensional input arrays are supported.")

if values.dtype == "i8":
Expand Down Expand Up @@ -272,6 +272,8 @@ def _from_sequence(cls, data, dtype=_TD_DTYPE, copy=False, freq=None, unit=None)

data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit)
freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer)
if data.ndim != 1:
freq_infer = False # TODO: could put this in inferred_freq?

result = cls._simple_new(data, freq=freq)

Expand Down Expand Up @@ -598,6 +600,9 @@ def __truediv__(self, other):
# e.g. list, tuple
other = np.array(other)

if self.ndim == 2 and other.ndim == 1 and len(other) == len(self):
other = other[:, None]

if len(other) != len(self):
raise ValueError("Cannot divide vectors with unequal lengths")

Expand All @@ -610,7 +615,26 @@ def __truediv__(self, other):
# an object array or numeric-dtyped (if numpy does inference)
# will be returned. GH#23829
result = [self[n] / other[n] for n in range(len(self))]
result = np.array(result)
if all(isinstance(x, TimedeltaArray) for x in result):
if len(result) == 1:
result = result[0].reshape(1, -1)
return result
if any(isinstance(x, TimedeltaArray) for x in result):
raise NotImplementedError(result)

result = np.asarray(result)
if result.size and (
isinstance(result.flat[0], Timedelta) or result.flat[0] is NaT
):
# try to do inference, since we are no longer calling the
# Series constructor to do it for us. Only do it if we
# know we aren't incorrectly casting numerics.
try:
result1d = type(self)._from_sequence(result.ravel())
except (ValueError, TypeError):
pass
else:
result = result1d.reshape(result.shape)
return result

else:
Expand Down Expand Up @@ -1076,7 +1100,7 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"):
)

data = np.array(data, copy=copy)
if data.ndim != 1:
if data.ndim not in [1, 2]:
raise ValueError("Only 1-dimensional input arrays are supported.")

assert data.dtype == "m8[ns]", data
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1213,7 +1213,7 @@ def __getitem__(self, key):
elif result.ndim > 1:
# To support MPL which performs slicing with 2 dim
# even though it only has 1 dim by definition
assert isinstance(result, np.ndarray), result
result = result._data
return result
return type(self)(result, name=self.name)

Expand Down
Loading