Skip to content

PERF: DTA/TDA _simple_new disallow i8 values #40116

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,15 +465,15 @@ def view(self, dtype: Optional[Dtype] = None) -> ArrayLike:
dtype = pandas_dtype(dtype)
if isinstance(dtype, (PeriodDtype, DatetimeTZDtype)):
cls = dtype.construct_array_type()
return cls._simple_new(self.asi8, dtype=dtype)
return cls(self.asi8, dtype=dtype)
elif dtype == "M8[ns]":
from pandas.core.arrays import DatetimeArray

return DatetimeArray._simple_new(self.asi8, dtype=dtype)
return DatetimeArray(self.asi8, dtype=dtype)
elif dtype == "m8[ns]":
from pandas.core.arrays import TimedeltaArray

return TimedeltaArray._simple_new(self.asi8.view("m8[ns]"), dtype=dtype)
return TimedeltaArray(self.asi8, dtype=dtype)
return self._ndarray.view(dtype=dtype)

# ------------------------------------------------------------------
Expand Down Expand Up @@ -1102,10 +1102,10 @@ def _add_timedeltalike_scalar(self, other):
return type(self)(new_values, dtype=self.dtype)

inc = delta_to_nanoseconds(other)
new_values = checked_add_with_arr(self.asi8, inc, arr_mask=self._isnan).view(
"i8"
)
new_values = checked_add_with_arr(self.asi8, inc, arr_mask=self._isnan)
new_values = new_values.view("i8")
new_values = self._maybe_mask_results(new_values)
new_values = new_values.view(self._ndarray.dtype)

new_freq = None
if isinstance(self.freq, Tick) or is_period_dtype(self.dtype):
Expand Down Expand Up @@ -1700,6 +1700,7 @@ def _round(self, freq, mode, ambiguous, nonexistent):
nanos = to_offset(freq).nanos
result = round_nsint64(values, mode, nanos)
result = self._maybe_mask_results(result, fill_value=iNaT)
result = result.view(self._ndarray.dtype)
return self._simple_new(result, dtype=self.dtype)

@Appender((_round_doc + _round_example).format(op="round"))
Expand Down
16 changes: 7 additions & 9 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,9 +315,7 @@ def _simple_new(
cls, values, freq: Optional[BaseOffset] = None, dtype=DT64NS_DTYPE
) -> DatetimeArray:
assert isinstance(values, np.ndarray)
if values.dtype != DT64NS_DTYPE:
assert values.dtype == "i8"
values = values.view(DT64NS_DTYPE)
assert values.dtype == DT64NS_DTYPE

result = object.__new__(cls)
result._ndarray = values
Expand Down Expand Up @@ -439,6 +437,7 @@ def _generate_range(
values = np.array([x.value for x in xdr], dtype=np.int64)

_tz = start.tz if start is not None else end.tz
values = values.view("M8[ns]")
index = cls._simple_new(values, freq=freq, dtype=tz_to_dtype(_tz))

if tz is not None and index.tz is None:
Expand All @@ -464,9 +463,8 @@ def _generate_range(
+ start.value
)
dtype = tz_to_dtype(tz)
index = cls._simple_new(
arr.astype("M8[ns]", copy=False), freq=None, dtype=dtype
)
arr = arr.astype("M8[ns]", copy=False)
index = cls._simple_new(arr, freq=None, dtype=dtype)

if not left_closed and len(index) and index[0] == start:
# TODO: overload DatetimeLikeArrayMixin.__getitem__
Expand All @@ -476,7 +474,7 @@ def _generate_range(
index = cast(DatetimeArray, index[:-1])

dtype = tz_to_dtype(tz)
return cls._simple_new(index.asi8, freq=freq, dtype=dtype)
return cls._simple_new(index._ndarray, freq=freq, dtype=dtype)

# -----------------------------------------------------------------
# DatetimeLike Interface
Expand Down Expand Up @@ -710,7 +708,7 @@ def _add_offset(self, offset):
values = self.tz_localize(None)
else:
values = self
result = offset._apply_array(values)
result = offset._apply_array(values).view("M8[ns]")
result = DatetimeArray._simple_new(result)
result = result.tz_localize(self.tz)

Expand Down Expand Up @@ -833,7 +831,7 @@ def tz_convert(self, tz):

# No conversion since timestamps are all UTC to begin with
dtype = tz_to_dtype(tz)
return self._simple_new(self.asi8, dtype=dtype, freq=self.freq)
return self._simple_new(self._ndarray, dtype=dtype, freq=self.freq)

@dtl.ravel_compat
def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"):
Expand Down
8 changes: 3 additions & 5 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,13 +229,11 @@ def _simple_new(
) -> TimedeltaArray:
assert dtype == TD64NS_DTYPE, dtype
assert isinstance(values, np.ndarray), type(values)
if values.dtype != TD64NS_DTYPE:
assert values.dtype == "i8"
values = values.view(TD64NS_DTYPE)
assert values.dtype == TD64NS_DTYPE

result = object.__new__(cls)
result._ndarray = values
result._freq = to_offset(freq)
result._freq = freq
result._dtype = TD64NS_DTYPE
return result

Expand Down Expand Up @@ -317,7 +315,7 @@ def _generate_range(cls, start, end, periods, freq, closed=None):
if not right_closed:
index = index[:-1]

return cls._simple_new(index, freq=freq)
return cls._simple_new(index.view("m8[ns]"), freq=freq)

# ----------------------------------------------------------------
# DatetimeLike Interface
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,8 @@ def maybe_downcast_to_dtype(
i8values = result.astype("i8", copy=False)
cls = dtype.construct_array_type()
# equiv: DatetimeArray(i8values).tz_localize("UTC").tz_convert(dtype.tz)
result = cls._simple_new(i8values, dtype=dtype)
dt64values = i8values.view("M8[ns]")
result = cls._simple_new(dt64values, dtype=dtype)
else:
result = result.astype(dtype)

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,7 +542,7 @@ def _ea_wrap_cython_operation(
return res_values

res_values = res_values.astype("i8", copy=False)
result = type(orig_values)._simple_new(res_values, dtype=orig_values.dtype)
result = type(orig_values)(res_values, dtype=orig_values.dtype)
return result

elif is_integer_dtype(values.dtype) or is_bool_dtype(values.dtype):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -820,7 +820,7 @@ def view(self, cls=None):
arr = self._data.view("i8")
idx_cls = self._dtype_to_subclass(dtype)
arr_cls = idx_cls._data_cls
arr = arr_cls._simple_new(self._data.view("i8"), dtype=dtype)
arr = arr_cls(self._data.view("i8"), dtype=dtype)
return idx_cls._simple_new(arr, name=self.name)

result = self._data.view(cls)
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ def wrapper(left, right):
join_index = orig_left._from_backing_data(join_index)

return join_index, left_indexer, right_indexer

return results

return wrapper
Expand Down Expand Up @@ -645,7 +646,8 @@ def _get_join_freq(self, other):

def _wrap_joined_index(self, joined: np.ndarray, other):
assert other.dtype == self.dtype, (other.dtype, self.dtype)

assert joined.dtype == "i8" or joined.dtype == self.dtype, joined.dtype
joined = joined.view(self._data._ndarray.dtype)
result = super()._wrap_joined_index(joined, other)
result._data._freq = self._get_join_freq(other)
return result
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1743,8 +1743,9 @@ def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike:
result = result.view(orig_dtype)
else:
# DatetimeArray
# TODO: have this case go through a DTA method?
result = type(values)._simple_new( # type: ignore[attr-defined]
result, dtype=orig_dtype
result.view("M8[ns]"), dtype=orig_dtype
)

elif skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)):
Expand Down
20 changes: 9 additions & 11 deletions pandas/tests/arrays/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,10 @@ def arr1d(self):
arr = self.array_cls(data, freq="D")
return arr

def test_compare_len1_raises(self):
def test_compare_len1_raises(self, arr1d):
# make sure we raise when comparing with different lengths, specific
# to the case where one has length-1, which numpy would broadcast
data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9

arr = self.array_cls._simple_new(data, freq="D")
arr = arr1d
idx = self.index_cls(arr)

with pytest.raises(ValueError, match="Lengths must match"):
Expand Down Expand Up @@ -153,7 +151,9 @@ def test_take(self):
data = np.arange(100, dtype="i8") * 24 * 3600 * 10 ** 9
np.random.shuffle(data)

arr = self.array_cls._simple_new(data, freq="D")
freq = None if self.array_cls is not PeriodArray else "D"

arr = self.array_cls(data, freq=freq)
idx = self.index_cls._simple_new(arr)

takers = [1, 4, 94]
Expand All @@ -172,7 +172,7 @@ def test_take(self):
def test_take_fill_raises(self, fill_value):
data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9

arr = self.array_cls._simple_new(data, freq="D")
arr = self.array_cls(data, freq="D")

msg = f"value should be a '{arr._scalar_type.__name__}' or 'NaT'. Got"
with pytest.raises(TypeError, match=msg):
Expand All @@ -181,7 +181,7 @@ def test_take_fill_raises(self, fill_value):
def test_take_fill(self):
data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9

arr = self.array_cls._simple_new(data, freq="D")
arr = self.array_cls(data, freq="D")

result = arr.take([-1, 1], allow_fill=True, fill_value=None)
assert result[0] is pd.NaT
Expand All @@ -202,10 +202,8 @@ def test_take_fill_str(self, arr1d):
with pytest.raises(TypeError, match=msg):
arr1d.take([-1, 1], allow_fill=True, fill_value="foo")

def test_concat_same_type(self):
data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9

arr = self.array_cls._simple_new(data, freq="D")
def test_concat_same_type(self, arr1d):
arr = arr1d
idx = self.index_cls(arr)
idx = idx.insert(0, pd.NaT)
arr = self.array_cls(idx)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexes/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def test_get_unique_index(self, index_flat):
vals = index[[0] * 5]._data
vals[0] = pd.NaT
elif needs_i8_conversion(index.dtype):
vals = index.asi8[[0] * 5]
vals = index._data._ndarray[[0] * 5]
vals[0] = iNaT
else:
vals = index.values[[0] * 5]
Expand All @@ -184,7 +184,7 @@ def test_get_unique_index(self, index_flat):
vals_unique = vals[:2]
if index.dtype.kind in ["m", "M"]:
# i.e. needs_i8_conversion but not period_dtype, as above
vals = type(index._data)._simple_new(vals, dtype=index.dtype)
vals = type(index._data)(vals, dtype=index.dtype)
vals_unique = type(index._data)._simple_new(vals_unique, dtype=index.dtype)
idx_nan = index._shallow_copy(vals)
idx_unique_nan = index._shallow_copy(vals_unique)
Expand Down