From bab357114b8de7f911b9f54667ced67c9a0ca0fc Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 9 Sep 2024 20:06:54 -0700 Subject: [PATCH 01/10] Implements `BinaryElementwiseFunc._inplace_op` method This method permits casting behavior equivalent to `"same_kind"` when using in-place operators by introducing the `_inplace_op` method Expands this to `__imatmul__` as well through use of the already-implemented `dtype` keyword --- dpctl/tensor/_elementwise_common.py | 130 +++++++++++++++++++++++++++- dpctl/tensor/_type_utils.py | 16 ++++ dpctl/tensor/_usmarray.pyx | 26 +++--- 3 files changed, 157 insertions(+), 15 deletions(-) diff --git a/dpctl/tensor/_elementwise_common.py b/dpctl/tensor/_elementwise_common.py index 6b38444902..991c7ca303 100644 --- a/dpctl/tensor/_elementwise_common.py +++ b/dpctl/tensor/_elementwise_common.py @@ -37,6 +37,7 @@ _all_data_types, _find_buf_dtype, _find_buf_dtype2, + _find_buf_dtype_in_place_op, _resolve_weak_types, _to_device_supported_dtype, ) @@ -213,7 +214,7 @@ def __call__(self, x, /, *, out=None, order="K"): if res_dt != out.dtype: raise ValueError( - f"Output array of type {res_dt} is needed," + f"Output array of type {res_dt} is needed, " f" got {out.dtype}" ) @@ -650,7 +651,7 @@ def __call__(self, o1, o2, /, *, out=None, order="K"): if res_dt != out.dtype: raise ValueError( - f"Output array of type {res_dt} is needed," + f"Output array of type {res_dt} is needed, " f"got {out.dtype}" ) @@ -927,3 +928,128 @@ def __call__(self, o1, o2, /, *, out=None, order="K"): ) _manager.add_event_pair(ht_, bf_ev) return out + + def _inplace_op(self, o1, o2): + if not isinstance(o1, dpt.usm_ndarray): + raise TypeError( + "Expected first argument to be " + f"dpctl.tensor.usm_ndarray, got {type(o1)}" + ) + if not o1.flags.writable: + raise ValueError("provided left-hand side array is read-only") + q1, o1_usm_type = o1.sycl_queue, o1.usm_type + q2, o2_usm_type = _get_queue_usm_type(o2) + if q2 is None: + exec_q = q1 + res_usm_type = o1_usm_type + else: + exec_q = dpctl.utils.get_execution_queue((q1, q2)) + if exec_q is None: + raise ExecutionPlacementError( + "Execution placement can not be unambiguously inferred " + "from input arguments." + ) + res_usm_type = dpctl.utils.get_coerced_usm_type( + ( + o1_usm_type, + o2_usm_type, + ) + ) + dpctl.utils.validate_usm_type(res_usm_type, allow_none=False) + o1_shape = o1.shape + o2_shape = _get_shape(o2) + if not isinstance(o2_shape, (tuple, list)): + raise TypeError( + "Shape of second argument can not be inferred. " + "Expected list or tuple." + ) + try: + res_shape = _broadcast_shape_impl( + [ + o1_shape, + o2_shape, + ] + ) + except ValueError: + raise ValueError( + "operands could not be broadcast together with shapes " + f"{o1_shape} and {o2_shape}" + ) + if res_shape != o1_shape: + raise ValueError("") + sycl_dev = exec_q.sycl_device + o1_dtype = o1.dtype + o2_dtype = _get_dtype(o2, sycl_dev) + if not _validate_dtype(o2_dtype): + raise ValueError("Operand has an unsupported data type") + + o1_dtype, o2_dtype = self.weak_type_resolver_( + o1_dtype, o2_dtype, sycl_dev + ) + + buf_dt, res_dt = _find_buf_dtype_in_place_op( + o1_dtype, + o2_dtype, + self.result_type_resolver_fn_, + sycl_dev, + ) + + if res_dt is None: + raise ValueError( + f"function '{self.name_}' does not support input types " + f"({o1_dtype}, {o2_dtype}), " + "and the inputs could not be safely coerced to any " + "supported types according to the casting rule ''same_kind''." + ) + + if res_dt != o1_dtype: + raise ValueError( + f"Output array of type {res_dt} is needed, " f"got {o1_dtype}" + ) + + _manager = SequentialOrderManager[exec_q] + if isinstance(o2, dpt.usm_ndarray): + src2 = o2 + if ( + ti._array_overlap(o2, o1) + and not ti._same_logical_tensors(o2, o1) + and buf_dt is None + ): + buf_dt = o2_dtype + else: + src2 = dpt.asarray(o2, dtype=o2_dtype, sycl_queue=exec_q) + if buf_dt is None: + if src2.shape != res_shape: + src2 = dpt.broadcast_to(src2, res_shape) + dep_evs = _manager.submitted_events + ht_, comp_ev = self.binary_inplace_fn_( + lhs=o1, + rhs=src2, + sycl_queue=exec_q, + depends=dep_evs, + ) + _manager.add_event_pair(ht_, comp_ev) + else: + buf = dpt.empty_like(src2, dtype=buf_dt) + dep_evs = _manager.submitted_events + ( + ht_copy_ev, + copy_ev, + ) = ti._copy_usm_ndarray_into_usm_ndarray( + src=src2, + dst=buf, + sycl_queue=exec_q, + depends=dep_evs, + ) + _manager.add_event_pair(ht_copy_ev, copy_ev) + + buf = dpt.broadcast_to(buf, res_shape) + ht_, bf_ev = self.binary_inplace_fn_( + lhs=o1, + rhs=buf, + sycl_queue=exec_q, + depends=[copy_ev], + ) + _manager.add_event_pair(ht_, bf_ev) + + return o1 diff --git a/dpctl/tensor/_type_utils.py b/dpctl/tensor/_type_utils.py index 890af46339..5defd154df 100644 --- a/dpctl/tensor/_type_utils.py +++ b/dpctl/tensor/_type_utils.py @@ -277,6 +277,21 @@ def _find_buf_dtype2(arg1_dtype, arg2_dtype, query_fn, sycl_dev, acceptance_fn): return None, None, None +def _find_buf_dtype_in_place_op(arg1_dtype, arg2_dtype, query_fn, sycl_dev): + res_dt = query_fn(arg1_dtype, arg2_dtype) + if res_dt: + return None, res_dt + + _fp16 = sycl_dev.has_aspect_fp16 + _fp64 = sycl_dev.has_aspect_fp64 + if _can_cast(arg2_dtype, arg1_dtype, _fp16, _fp64, casting="same_kind"): + res_dt = query_fn(arg1_dtype, arg1_dtype) + if res_dt: + return arg1_dtype, res_dt + + return None, None + + class WeakBooleanType: "Python type representing type of Python boolean objects" @@ -959,4 +974,5 @@ def _default_accumulation_dtype_fp_types(inp_dt, q): "WeakComplexType", "_default_accumulation_dtype", "_default_accumulation_dtype_fp_types", + "_find_buf_dtype_in_place_op", ] diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index a46f3f763f..e879424036 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -1508,43 +1508,43 @@ cdef class usm_ndarray: return dpctl.tensor.bitwise_xor(other, self) def __iadd__(self, other): - return dpctl.tensor.add(self, other, out=self) + return dpctl.tensor.add._inplace_op(self, other) def __iand__(self, other): - return dpctl.tensor.bitwise_and(self, other, out=self) + return dpctl.tensor.bitwise_and._inplace_op(self, other) def __ifloordiv__(self, other): - return dpctl.tensor.floor_divide(self, other, out=self) + return dpctl.tensor.floor_divide._inplace_op(self, other) def __ilshift__(self, other): - return dpctl.tensor.bitwise_left_shift(self, other, out=self) + return dpctl.tensor.bitwise_left_shift._inplace_op(self, other) def __imatmul__(self, other): - return dpctl.tensor.matmul(self, other, out=self) + return dpctl.tensor.matmul(self, other, out=self, dtype=self.dtype) def __imod__(self, other): - return dpctl.tensor.remainder(self, other, out=self) + return dpctl.tensor.remainder._inplace_op(self, other) def __imul__(self, other): - return dpctl.tensor.multiply(self, other, out=self) + return dpctl.tensor.multiply._inplace_op(self, other) def __ior__(self, other): - return dpctl.tensor.bitwise_or(self, other, out=self) + return dpctl.tensor.bitwise_or._inplace_op(self, other) def __ipow__(self, other): - return dpctl.tensor.pow(self, other, out=self) + return dpctl.tensor.pow._inplace_op(self, other) def __irshift__(self, other): - return dpctl.tensor.bitwise_right_shift(self, other, out=self) + return dpctl.tensor.bitwise_right_shift._inplace_op(self, other) def __isub__(self, other): - return dpctl.tensor.subtract(self, other, out=self) + return dpctl.tensor.subtract._inplace_op(self, other) def __itruediv__(self, other): - return dpctl.tensor.divide(self, other, out=self) + return dpctl.tensor.divide._inplace_op(self, other) def __ixor__(self, other): - return dpctl.tensor.bitwise_xor(self, other, out=self) + return dpctl.tensor.bitwise_xor._inplace_op(self, other) def __str__(self): return usm_ndarray_str(self) From 79208c8dcedc78ca27d8289ec51d1dafcf4574a7 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 9 Sep 2024 23:48:36 -0700 Subject: [PATCH 02/10] Adjusts tests for in-place element-wise operations to account for `"same_kind"` casting --- dpctl/tests/elementwise/test_add.py | 2 +- dpctl/tests/elementwise/test_bitwise_and.py | 2 +- dpctl/tests/elementwise/test_bitwise_left_shift.py | 2 +- dpctl/tests/elementwise/test_bitwise_or.py | 2 +- dpctl/tests/elementwise/test_bitwise_xor.py | 2 +- dpctl/tests/elementwise/test_divide.py | 4 ++-- dpctl/tests/elementwise/test_floor_divide.py | 2 +- dpctl/tests/elementwise/test_multiply.py | 2 +- dpctl/tests/elementwise/test_pow.py | 2 +- dpctl/tests/elementwise/test_remainder.py | 2 +- dpctl/tests/elementwise/test_subtract.py | 2 +- 11 files changed, 12 insertions(+), 12 deletions(-) diff --git a/dpctl/tests/elementwise/test_add.py b/dpctl/tests/elementwise/test_add.py index 9edc8399e8..c02a2b126a 100644 --- a/dpctl/tests/elementwise/test_add.py +++ b/dpctl/tests/elementwise/test_add.py @@ -358,7 +358,7 @@ def test_add_inplace_dtype_matrix(op1_dtype, op2_dtype): dev = q.sycl_device _fp16 = dev.has_aspect_fp16 _fp64 = dev.has_aspect_fp64 - if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64): + if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64, casting="same_kind"): ar1 += ar2 assert ( dpt.asnumpy(ar1) == np.full(ar1.shape, 2, dtype=ar1.dtype) diff --git a/dpctl/tests/elementwise/test_bitwise_and.py b/dpctl/tests/elementwise/test_bitwise_and.py index 767323eb6e..97d95e27f8 100644 --- a/dpctl/tests/elementwise/test_bitwise_and.py +++ b/dpctl/tests/elementwise/test_bitwise_and.py @@ -114,7 +114,7 @@ def test_bitwise_and_inplace_dtype_matrix(op1_dtype, op2_dtype): dev = q.sycl_device _fp16 = dev.has_aspect_fp16 _fp64 = dev.has_aspect_fp64 - if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64): + if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64, casting="same_kind"): ar1 &= ar2 assert dpt.all(ar1 == 1) diff --git a/dpctl/tests/elementwise/test_bitwise_left_shift.py b/dpctl/tests/elementwise/test_bitwise_left_shift.py index e2e3538176..0d1eab4575 100644 --- a/dpctl/tests/elementwise/test_bitwise_left_shift.py +++ b/dpctl/tests/elementwise/test_bitwise_left_shift.py @@ -122,7 +122,7 @@ def test_bitwise_left_shift_inplace_dtype_matrix(op1_dtype, op2_dtype): dev = q.sycl_device _fp16 = dev.has_aspect_fp16 _fp64 = dev.has_aspect_fp64 - if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64): + if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64, casting="same_kind"): ar1 <<= ar2 assert dpt.all(ar1 == 2) diff --git a/dpctl/tests/elementwise/test_bitwise_or.py b/dpctl/tests/elementwise/test_bitwise_or.py index 2cfa2af6f6..c854512436 100644 --- a/dpctl/tests/elementwise/test_bitwise_or.py +++ b/dpctl/tests/elementwise/test_bitwise_or.py @@ -114,7 +114,7 @@ def test_bitwise_or_inplace_dtype_matrix(op1_dtype, op2_dtype): dev = q.sycl_device _fp16 = dev.has_aspect_fp16 _fp64 = dev.has_aspect_fp64 - if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64): + if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64, casting="same_kind"): ar1 |= ar2 assert dpt.all(ar1 == 1) diff --git a/dpctl/tests/elementwise/test_bitwise_xor.py b/dpctl/tests/elementwise/test_bitwise_xor.py index eca4da853f..d64bfa0186 100644 --- a/dpctl/tests/elementwise/test_bitwise_xor.py +++ b/dpctl/tests/elementwise/test_bitwise_xor.py @@ -114,7 +114,7 @@ def test_bitwise_xor_inplace_dtype_matrix(op1_dtype, op2_dtype): dev = q.sycl_device _fp16 = dev.has_aspect_fp16 _fp64 = dev.has_aspect_fp64 - if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64): + if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64, casting="same_kind"): ar1 ^= ar2 assert dpt.all(ar1 == 0) diff --git a/dpctl/tests/elementwise/test_divide.py b/dpctl/tests/elementwise/test_divide.py index d6b7d15201..16d73a040c 100644 --- a/dpctl/tests/elementwise/test_divide.py +++ b/dpctl/tests/elementwise/test_divide.py @@ -226,7 +226,7 @@ def test_divide_inplace_dtype_matrix(op1_dtype, op2_dtype): _fp64 = dev.has_aspect_fp64 # out array only valid if it is inexact if ( - _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64) + _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64, casting="same_kind") and dpt.dtype(op1_dtype).kind in "fc" ): ar1 /= ar2 @@ -276,7 +276,7 @@ def test_divide_gh_1711(): # don't test for overflowing double as Python won't cast -# an Python integer of that size to a Python float +# a Python integer of that size to a Python float @pytest.mark.parametrize("fp_dt", [dpt.float16, dpt.float32]) def test_divide_by_scalar_overflow(fp_dt): q = get_queue_or_skip() diff --git a/dpctl/tests/elementwise/test_floor_divide.py b/dpctl/tests/elementwise/test_floor_divide.py index c2f3415642..e96c95d6cd 100644 --- a/dpctl/tests/elementwise/test_floor_divide.py +++ b/dpctl/tests/elementwise/test_floor_divide.py @@ -290,7 +290,7 @@ def test_floor_divide_inplace_dtype_matrix(op1_dtype, op2_dtype): _fp16 = dev.has_aspect_fp16 _fp64 = dev.has_aspect_fp64 # out array only valid if it is inexact - if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64): + if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64, casting="same_kind"): ar1 //= ar2 assert dpt.all(ar1 == 1) diff --git a/dpctl/tests/elementwise/test_multiply.py b/dpctl/tests/elementwise/test_multiply.py index e15bd367b0..82c4dcd04d 100644 --- a/dpctl/tests/elementwise/test_multiply.py +++ b/dpctl/tests/elementwise/test_multiply.py @@ -205,7 +205,7 @@ def test_multiply_inplace_dtype_matrix(op1_dtype, op2_dtype): dev = q.sycl_device _fp16 = dev.has_aspect_fp16 _fp64 = dev.has_aspect_fp64 - if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64): + if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64, casting="same_kind"): ar1 *= ar2 assert ( dpt.asnumpy(ar1) == np.full(ar1.shape, 1, dtype=ar1.dtype) diff --git a/dpctl/tests/elementwise/test_pow.py b/dpctl/tests/elementwise/test_pow.py index e298ed2347..0e8692df9a 100644 --- a/dpctl/tests/elementwise/test_pow.py +++ b/dpctl/tests/elementwise/test_pow.py @@ -183,7 +183,7 @@ def test_pow_inplace_dtype_matrix(op1_dtype, op2_dtype): dev = q.sycl_device _fp16 = dev.has_aspect_fp16 _fp64 = dev.has_aspect_fp64 - if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64): + if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64, casting="same_kind"): ar1 **= ar2 assert ( dpt.asnumpy(ar1) == np.full(ar1.shape, 1, dtype=ar1.dtype) diff --git a/dpctl/tests/elementwise/test_remainder.py b/dpctl/tests/elementwise/test_remainder.py index ce1711074c..638384de1f 100644 --- a/dpctl/tests/elementwise/test_remainder.py +++ b/dpctl/tests/elementwise/test_remainder.py @@ -235,7 +235,7 @@ def test_remainder_inplace_dtype_matrix(op1_dtype, op2_dtype): dev = q.sycl_device _fp16 = dev.has_aspect_fp16 _fp64 = dev.has_aspect_fp64 - if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64): + if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64, casting="same_kind"): ar1 %= ar2 assert dpt.all(ar1 == dpt.zeros(ar1.shape, dtype=ar1.dtype)) diff --git a/dpctl/tests/elementwise/test_subtract.py b/dpctl/tests/elementwise/test_subtract.py index 71647a7306..16c05a9dbe 100644 --- a/dpctl/tests/elementwise/test_subtract.py +++ b/dpctl/tests/elementwise/test_subtract.py @@ -208,7 +208,7 @@ def test_subtract_inplace_dtype_matrix(op1_dtype, op2_dtype): dev = q.sycl_device _fp16 = dev.has_aspect_fp16 _fp64 = dev.has_aspect_fp64 - if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64): + if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64, casting="same_kind"): ar1 -= ar2 assert (dpt.asnumpy(ar1) == np.zeros(ar1.shape, dtype=ar1.dtype)).all() From f13c02d79e8323df2c57a43a57b84657ee395492 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 10 Sep 2024 10:52:55 -0700 Subject: [PATCH 03/10] `BinaryElementwiseFunc._inplace_op` now checks if a kernel is available Raises `ValueError` otherwise --- dpctl/tensor/_elementwise_common.py | 232 ++++++++++++++-------------- 1 file changed, 120 insertions(+), 112 deletions(-) diff --git a/dpctl/tensor/_elementwise_common.py b/dpctl/tensor/_elementwise_common.py index 991c7ca303..8c3ea49c58 100644 --- a/dpctl/tensor/_elementwise_common.py +++ b/dpctl/tensor/_elementwise_common.py @@ -930,126 +930,134 @@ def __call__(self, o1, o2, /, *, out=None, order="K"): return out def _inplace_op(self, o1, o2): - if not isinstance(o1, dpt.usm_ndarray): - raise TypeError( - "Expected first argument to be " - f"dpctl.tensor.usm_ndarray, got {type(o1)}" - ) - if not o1.flags.writable: - raise ValueError("provided left-hand side array is read-only") - q1, o1_usm_type = o1.sycl_queue, o1.usm_type - q2, o2_usm_type = _get_queue_usm_type(o2) - if q2 is None: - exec_q = q1 - res_usm_type = o1_usm_type - else: - exec_q = dpctl.utils.get_execution_queue((q1, q2)) - if exec_q is None: - raise ExecutionPlacementError( - "Execution placement can not be unambiguously inferred " - "from input arguments." + if self.binary_inplace_fn_ is not None: + if not isinstance(o1, dpt.usm_ndarray): + raise TypeError( + "Expected first argument to be " + f"dpctl.tensor.usm_ndarray, got {type(o1)}" ) - res_usm_type = dpctl.utils.get_coerced_usm_type( - ( - o1_usm_type, - o2_usm_type, + if not o1.flags.writable: + raise ValueError("provided left-hand side array is read-only") + q1, o1_usm_type = o1.sycl_queue, o1.usm_type + q2, o2_usm_type = _get_queue_usm_type(o2) + if q2 is None: + exec_q = q1 + res_usm_type = o1_usm_type + else: + exec_q = dpctl.utils.get_execution_queue((q1, q2)) + if exec_q is None: + raise ExecutionPlacementError( + "Execution placement can not be unambiguously inferred " + "from input arguments." + ) + res_usm_type = dpctl.utils.get_coerced_usm_type( + ( + o1_usm_type, + o2_usm_type, + ) ) + dpctl.utils.validate_usm_type(res_usm_type, allow_none=False) + o1_shape = o1.shape + o2_shape = _get_shape(o2) + if not isinstance(o2_shape, (tuple, list)): + raise TypeError( + "Shape of second argument can not be inferred. " + "Expected list or tuple." + ) + try: + res_shape = _broadcast_shape_impl( + [ + o1_shape, + o2_shape, + ] + ) + except ValueError: + raise ValueError( + "operands could not be broadcast together with shapes " + f"{o1_shape} and {o2_shape}" + ) + if res_shape != o1_shape: + raise ValueError("") + sycl_dev = exec_q.sycl_device + o1_dtype = o1.dtype + o2_dtype = _get_dtype(o2, sycl_dev) + if not _validate_dtype(o2_dtype): + raise ValueError("Operand has an unsupported data type") + + o1_dtype, o2_dtype = self.weak_type_resolver_( + o1_dtype, o2_dtype, sycl_dev ) - dpctl.utils.validate_usm_type(res_usm_type, allow_none=False) - o1_shape = o1.shape - o2_shape = _get_shape(o2) - if not isinstance(o2_shape, (tuple, list)): - raise TypeError( - "Shape of second argument can not be inferred. " - "Expected list or tuple." - ) - try: - res_shape = _broadcast_shape_impl( - [ - o1_shape, - o2_shape, - ] - ) - except ValueError: - raise ValueError( - "operands could not be broadcast together with shapes " - f"{o1_shape} and {o2_shape}" + + buf_dt, res_dt = _find_buf_dtype_in_place_op( + o1_dtype, + o2_dtype, + self.result_type_resolver_fn_, + sycl_dev, ) - if res_shape != o1_shape: - raise ValueError("") - sycl_dev = exec_q.sycl_device - o1_dtype = o1.dtype - o2_dtype = _get_dtype(o2, sycl_dev) - if not _validate_dtype(o2_dtype): - raise ValueError("Operand has an unsupported data type") - o1_dtype, o2_dtype = self.weak_type_resolver_( - o1_dtype, o2_dtype, sycl_dev - ) + if res_dt is None: + raise ValueError( + f"function '{self.name_}' does not support input types " + f"({o1_dtype}, {o2_dtype}), " + "and the inputs could not be safely coerced to any " + "supported types according to the casting rule " + "''same_kind''." + ) - buf_dt, res_dt = _find_buf_dtype_in_place_op( - o1_dtype, - o2_dtype, - self.result_type_resolver_fn_, - sycl_dev, - ) + if res_dt != o1_dtype: + raise ValueError( + f"Output array of type {res_dt} is needed, " + f"got {o1_dtype}" + ) - if res_dt is None: - raise ValueError( - f"function '{self.name_}' does not support input types " - f"({o1_dtype}, {o2_dtype}), " - "and the inputs could not be safely coerced to any " - "supported types according to the casting rule ''same_kind''." - ) + _manager = SequentialOrderManager[exec_q] + if isinstance(o2, dpt.usm_ndarray): + src2 = o2 + if ( + ti._array_overlap(o2, o1) + and not ti._same_logical_tensors(o2, o1) + and buf_dt is None + ): + buf_dt = o2_dtype + else: + src2 = dpt.asarray(o2, dtype=o2_dtype, sycl_queue=exec_q) + if buf_dt is None: + if src2.shape != res_shape: + src2 = dpt.broadcast_to(src2, res_shape) + dep_evs = _manager.submitted_events + ht_, comp_ev = self.binary_inplace_fn_( + lhs=o1, + rhs=src2, + sycl_queue=exec_q, + depends=dep_evs, + ) + _manager.add_event_pair(ht_, comp_ev) + else: + buf = dpt.empty_like(src2, dtype=buf_dt) + dep_evs = _manager.submitted_events + ( + ht_copy_ev, + copy_ev, + ) = ti._copy_usm_ndarray_into_usm_ndarray( + src=src2, + dst=buf, + sycl_queue=exec_q, + depends=dep_evs, + ) + _manager.add_event_pair(ht_copy_ev, copy_ev) - if res_dt != o1_dtype: - raise ValueError( - f"Output array of type {res_dt} is needed, " f"got {o1_dtype}" - ) + buf = dpt.broadcast_to(buf, res_shape) + ht_, bf_ev = self.binary_inplace_fn_( + lhs=o1, + rhs=buf, + sycl_queue=exec_q, + depends=[copy_ev], + ) + _manager.add_event_pair(ht_, bf_ev) - _manager = SequentialOrderManager[exec_q] - if isinstance(o2, dpt.usm_ndarray): - src2 = o2 - if ( - ti._array_overlap(o2, o1) - and not ti._same_logical_tensors(o2, o1) - and buf_dt is None - ): - buf_dt = o2_dtype - else: - src2 = dpt.asarray(o2, dtype=o2_dtype, sycl_queue=exec_q) - if buf_dt is None: - if src2.shape != res_shape: - src2 = dpt.broadcast_to(src2, res_shape) - dep_evs = _manager.submitted_events - ht_, comp_ev = self.binary_inplace_fn_( - lhs=o1, - rhs=src2, - sycl_queue=exec_q, - depends=dep_evs, - ) - _manager.add_event_pair(ht_, comp_ev) + return o1 else: - buf = dpt.empty_like(src2, dtype=buf_dt) - dep_evs = _manager.submitted_events - ( - ht_copy_ev, - copy_ev, - ) = ti._copy_usm_ndarray_into_usm_ndarray( - src=src2, - dst=buf, - sycl_queue=exec_q, - depends=dep_evs, - ) - _manager.add_event_pair(ht_copy_ev, copy_ev) - - buf = dpt.broadcast_to(buf, res_shape) - ht_, bf_ev = self.binary_inplace_fn_( - lhs=o1, - rhs=buf, - sycl_queue=exec_q, - depends=[copy_ev], + raise ValueError( + "binary function does not have a dedicated in-place " + "implementation" ) - _manager.add_event_pair(ht_, bf_ev) - - return o1 From d7fba40375d68f8d89a2e375e95e256960fbd35e Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 10 Sep 2024 11:11:22 -0700 Subject: [PATCH 04/10] Adds message to `ValueError` for cast of binary in-place operator where both inputs are broadcast to a new shape --- dpctl/tensor/_elementwise_common.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/dpctl/tensor/_elementwise_common.py b/dpctl/tensor/_elementwise_common.py index 8c3ea49c58..87f4959de2 100644 --- a/dpctl/tensor/_elementwise_common.py +++ b/dpctl/tensor/_elementwise_common.py @@ -976,8 +976,14 @@ def _inplace_op(self, o1, o2): "operands could not be broadcast together with shapes " f"{o1_shape} and {o2_shape}" ) + if res_shape != o1_shape: - raise ValueError("") + raise ValueError( + "The shape of the non-broadcastable left-hand " + f"side {o1_shape} is inconsistent with the " + f"broadcast shape {res_shape}." + ) + sycl_dev = exec_q.sycl_device o1_dtype = o1.dtype o2_dtype = _get_dtype(o2, sycl_dev) From 7f59819c777da7b882306cd3f41cbf5bbe772aa4 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 10 Sep 2024 15:11:53 -0700 Subject: [PATCH 05/10] Add more tests for element-wise in-place operators Also clean up and make some tests for in-place operators more efficient --- dpctl/tests/elementwise/test_add.py | 68 +++++++++++++++++-- dpctl/tests/elementwise/test_bitwise_and.py | 16 ----- .../elementwise/test_bitwise_left_shift.py | 16 ----- .../elementwise/test_elementwise_classes.py | 11 ++- dpctl/tests/elementwise/test_floor_divide.py | 15 ---- 5 files changed, 71 insertions(+), 55 deletions(-) diff --git a/dpctl/tests/elementwise/test_add.py b/dpctl/tests/elementwise/test_add.py index c02a2b126a..e7838005b0 100644 --- a/dpctl/tests/elementwise/test_add.py +++ b/dpctl/tests/elementwise/test_add.py @@ -373,9 +373,25 @@ def test_add_inplace_dtype_matrix(op1_dtype, op2_dtype): else: with pytest.raises(ValueError): ar1 += ar2 + + ar1 = dpt.ones(sz, dtype=op1_dtype) + ar2 = dpt.ones_like(ar1, dtype=op2_dtype) + if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64): + dpt.add(ar1, ar2, out=ar1) + assert ( + dpt.asnumpy(ar1) == np.full(ar1.shape, 2, dtype=ar1.dtype) + ).all() + + ar3 = dpt.ones(sz, dtype=op1_dtype)[::-1] + ar4 = dpt.ones(2 * sz, dtype=op2_dtype)[::2] + dpt.add(ar3, ar4, out=ar3) + assert ( + dpt.asnumpy(ar3) == np.full(ar3.shape, 2, dtype=ar3.dtype) + ).all() + else: + with pytest.raises(ValueError): dpt.add(ar1, ar2, out=ar1) - # out is second arg ar1 = dpt.ones(sz, dtype=op1_dtype) ar2 = dpt.ones_like(ar1, dtype=op2_dtype) if _can_cast(ar1.dtype, ar2.dtype, _fp16, _fp64): @@ -401,7 +417,7 @@ def test_add_inplace_broadcasting(): m = dpt.ones((100, 5), dtype="i4") v = dpt.arange(5, dtype="i4") - m += v + dpt.add(m, v, out=m) assert (dpt.asnumpy(m) == np.arange(1, 6, dtype="i4")[np.newaxis, :]).all() # check case where second arg is out @@ -411,6 +427,26 @@ def test_add_inplace_broadcasting(): ).all() +def test_add_inplace_operator_broadcasting(): + get_queue_or_skip() + + m = dpt.ones((100, 5), dtype="i4") + v = dpt.arange(5, dtype="i4") + + m += v + assert (dpt.asnumpy(m) == np.arange(1, 6, dtype="i4")[np.newaxis, :]).all() + + +def test_add_inplace_operator_mutual_broadcast(): + get_queue_or_skip() + + x1 = dpt.ones((1, 10), dtype="i4") + x2 = dpt.ones((10, 1), dtype="i4") + + with pytest.raises(ValueError): + dpt.add._inplace_op(x1, x2) + + def test_add_inplace_errors(): get_queue_or_skip() try: @@ -425,27 +461,45 @@ def test_add_inplace_errors(): ar1 = dpt.ones(2, dtype="float32", sycl_queue=gpu_queue) ar2 = dpt.ones_like(ar1, sycl_queue=cpu_queue) with pytest.raises(ExecutionPlacementError): - ar1 += ar2 + dpt.add(ar1, ar2, out=ar1) ar1 = dpt.ones(2, dtype="float32") ar2 = dpt.ones(3, dtype="float32") with pytest.raises(ValueError): - ar1 += ar2 + dpt.add(ar1, ar2, out=ar1) ar1 = np.ones(2, dtype="float32") ar2 = dpt.ones(2, dtype="float32") with pytest.raises(TypeError): - ar1 += ar2 + dpt.add(ar1, ar2, out=ar1) ar1 = dpt.ones(2, dtype="float32") ar2 = dict() with pytest.raises(ValueError): - ar1 += ar2 + dpt.add(ar1, ar2, out=ar1) ar1 = dpt.ones((2, 1), dtype="float32") ar2 = dpt.ones((1, 2), dtype="float32") with pytest.raises(ValueError): - ar1 += ar2 + dpt.add(ar1, ar2, out=ar1) + + +def test_add_inplace_operator_errors(): + q1 = get_queue_or_skip() + q2 = get_queue_or_skip() + + x = dpt.ones(10, dtype="i4", sycl_queue=q1) + with pytest.raises(TypeError): + dpt.add._inplace_op(dict(), x) + + x.flags["W"] = False + with pytest.raises(ValueError): + dpt.add._inplace_op(x, 2) + + x_q1 = dpt.ones(10, dtype="i4", sycl_queue=q1) + x_q2 = dpt.ones(10, dtype="i4", sycl_queue=q2) + with pytest.raises(ExecutionPlacementError): + dpt.add._inplace_op(x_q1, x_q2) def test_add_inplace_same_tensors(): diff --git a/dpctl/tests/elementwise/test_bitwise_and.py b/dpctl/tests/elementwise/test_bitwise_and.py index 97d95e27f8..f90e03ea29 100644 --- a/dpctl/tests/elementwise/test_bitwise_and.py +++ b/dpctl/tests/elementwise/test_bitwise_and.py @@ -125,19 +125,3 @@ def test_bitwise_and_inplace_dtype_matrix(op1_dtype, op2_dtype): else: with pytest.raises(ValueError): ar1 &= ar2 - dpt.bitwise_and(ar1, ar2, out=ar1) - - # out is second arg - ar1 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q) - ar2 = dpt.ones_like(ar1, dtype=op2_dtype, sycl_queue=q) - if _can_cast(ar1.dtype, ar2.dtype, _fp16, _fp64): - dpt.bitwise_and(ar1, ar2, out=ar2) - assert dpt.all(ar2 == 1) - - ar3 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q)[::-1] - ar4 = dpt.ones(2 * sz, dtype=op2_dtype, sycl_queue=q)[::2] - dpt.bitwise_and(ar3, ar4, out=ar4) - dpt.all(ar4 == 1) - else: - with pytest.raises(ValueError): - dpt.bitwise_and(ar1, ar2, out=ar2) diff --git a/dpctl/tests/elementwise/test_bitwise_left_shift.py b/dpctl/tests/elementwise/test_bitwise_left_shift.py index 0d1eab4575..bd7aefe5af 100644 --- a/dpctl/tests/elementwise/test_bitwise_left_shift.py +++ b/dpctl/tests/elementwise/test_bitwise_left_shift.py @@ -133,19 +133,3 @@ def test_bitwise_left_shift_inplace_dtype_matrix(op1_dtype, op2_dtype): else: with pytest.raises(ValueError): ar1 <<= ar2 - dpt.bitwise_left_shift(ar1, ar2, out=ar1) - - # out is second arg - ar1 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q) - ar2 = dpt.ones_like(ar1, dtype=op2_dtype, sycl_queue=q) - if _can_cast(ar1.dtype, ar2.dtype, _fp16, _fp64): - dpt.bitwise_left_shift(ar1, ar2, out=ar2) - assert dpt.all(ar2 == 2) - - ar3 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q)[::-1] - ar4 = dpt.ones(2 * sz, dtype=op2_dtype, sycl_queue=q)[::2] - dpt.bitwise_left_shift(ar3, ar4, out=ar4) - dpt.all(ar4 == 2) - else: - with pytest.raises(ValueError): - dpt.bitwise_left_shift(ar1, ar2, out=ar2) diff --git a/dpctl/tests/elementwise/test_elementwise_classes.py b/dpctl/tests/elementwise/test_elementwise_classes.py index ac6af2d417..fb220b811a 100644 --- a/dpctl/tests/elementwise/test_elementwise_classes.py +++ b/dpctl/tests/elementwise/test_elementwise_classes.py @@ -118,7 +118,7 @@ def test_binary_class_nout(): assert nout == 1 -def test_biary_read_only_out(): +def test_binary_read_only_out(): get_queue_or_skip() x1 = dpt.ones(32, dtype=dpt.float32) x2 = dpt.ones_like(x1) @@ -126,3 +126,12 @@ def test_biary_read_only_out(): r.flags["W"] = False with pytest.raises(ValueError): binary_fn(x1, x2, out=r) + + +def test_binary_no_inplace_op(): + get_queue_or_skip() + x1 = dpt.ones(10, dtype="i4") + x2 = dpt.ones_like(x1) + + with pytest.raises(ValueError): + dpt.logaddexp._inplace_op(x1, x2) diff --git a/dpctl/tests/elementwise/test_floor_divide.py b/dpctl/tests/elementwise/test_floor_divide.py index e96c95d6cd..068a42f338 100644 --- a/dpctl/tests/elementwise/test_floor_divide.py +++ b/dpctl/tests/elementwise/test_floor_divide.py @@ -302,18 +302,3 @@ def test_floor_divide_inplace_dtype_matrix(op1_dtype, op2_dtype): with pytest.raises(ValueError): ar1 //= ar2 dpt.floor_divide(ar1, ar2, out=ar1) - - # out is second arg - ar1 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q) - ar2 = dpt.ones_like(ar1, dtype=op2_dtype, sycl_queue=q) - if _can_cast(ar1.dtype, ar2.dtype, _fp16, _fp64): - dpt.floor_divide(ar1, ar2, out=ar2) - assert dpt.all(ar2 == 1) - - ar3 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q)[::-1] - ar4 = dpt.ones(2 * sz, dtype=op2_dtype, sycl_queue=q)[::2] - dpt.floor_divide(ar3, ar4, out=ar4) - dpt.all(ar4 == 1) - else: - with pytest.raises(ValueError): - dpt.floor_divide(ar1, ar2, out=ar2) From 9529f0a8e8ba47fbf682d49559f54d41969b14cb Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Wed, 11 Sep 2024 08:01:37 -0700 Subject: [PATCH 06/10] Change per PR review to exception in UnaryElementwiseFunc --- dpctl/tensor/_elementwise_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpctl/tensor/_elementwise_common.py b/dpctl/tensor/_elementwise_common.py index 87f4959de2..c6372e6e5e 100644 --- a/dpctl/tensor/_elementwise_common.py +++ b/dpctl/tensor/_elementwise_common.py @@ -215,7 +215,7 @@ def __call__(self, x, /, *, out=None, order="K"): if res_dt != out.dtype: raise ValueError( f"Output array of type {res_dt} is needed, " - f" got {out.dtype}" + f"got {out.dtype}" ) if ( From 54dd6b622db0afea954162646f6017df003bd88f Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Wed, 11 Sep 2024 11:37:28 -0700 Subject: [PATCH 07/10] Change per PR review by @oleksandr-pavlyk --- dpctl/tensor/_elementwise_common.py | 248 ++++++++++++++-------------- 1 file changed, 123 insertions(+), 125 deletions(-) diff --git a/dpctl/tensor/_elementwise_common.py b/dpctl/tensor/_elementwise_common.py index c6372e6e5e..5b26eb9225 100644 --- a/dpctl/tensor/_elementwise_common.py +++ b/dpctl/tensor/_elementwise_common.py @@ -930,140 +930,138 @@ def __call__(self, o1, o2, /, *, out=None, order="K"): return out def _inplace_op(self, o1, o2): - if self.binary_inplace_fn_ is not None: - if not isinstance(o1, dpt.usm_ndarray): - raise TypeError( - "Expected first argument to be " - f"dpctl.tensor.usm_ndarray, got {type(o1)}" - ) - if not o1.flags.writable: - raise ValueError("provided left-hand side array is read-only") - q1, o1_usm_type = o1.sycl_queue, o1.usm_type - q2, o2_usm_type = _get_queue_usm_type(o2) - if q2 is None: - exec_q = q1 - res_usm_type = o1_usm_type - else: - exec_q = dpctl.utils.get_execution_queue((q1, q2)) - if exec_q is None: - raise ExecutionPlacementError( - "Execution placement can not be unambiguously inferred " - "from input arguments." - ) - res_usm_type = dpctl.utils.get_coerced_usm_type( - ( - o1_usm_type, - o2_usm_type, - ) - ) - dpctl.utils.validate_usm_type(res_usm_type, allow_none=False) - o1_shape = o1.shape - o2_shape = _get_shape(o2) - if not isinstance(o2_shape, (tuple, list)): - raise TypeError( - "Shape of second argument can not be inferred. " - "Expected list or tuple." - ) - try: - res_shape = _broadcast_shape_impl( - [ - o1_shape, - o2_shape, - ] - ) - except ValueError: - raise ValueError( - "operands could not be broadcast together with shapes " - f"{o1_shape} and {o2_shape}" + if self.binary_inplace_fn_ is None: + raise ValueError( + "binary function does not have a dedicated in-place " + "implementation" + ) + if not isinstance(o1, dpt.usm_ndarray): + raise TypeError( + "Expected first argument to be " + f"dpctl.tensor.usm_ndarray, got {type(o1)}" + ) + if not o1.flags.writable: + raise ValueError("provided left-hand side array is read-only") + q1, o1_usm_type = o1.sycl_queue, o1.usm_type + q2, o2_usm_type = _get_queue_usm_type(o2) + if q2 is None: + exec_q = q1 + res_usm_type = o1_usm_type + else: + exec_q = dpctl.utils.get_execution_queue((q1, q2)) + if exec_q is None: + raise ExecutionPlacementError( + "Execution placement can not be unambiguously inferred " + "from input arguments." ) - - if res_shape != o1_shape: - raise ValueError( - "The shape of the non-broadcastable left-hand " - f"side {o1_shape} is inconsistent with the " - f"broadcast shape {res_shape}." + res_usm_type = dpctl.utils.get_coerced_usm_type( + ( + o1_usm_type, + o2_usm_type, ) - - sycl_dev = exec_q.sycl_device - o1_dtype = o1.dtype - o2_dtype = _get_dtype(o2, sycl_dev) - if not _validate_dtype(o2_dtype): - raise ValueError("Operand has an unsupported data type") - - o1_dtype, o2_dtype = self.weak_type_resolver_( - o1_dtype, o2_dtype, sycl_dev + ) + dpctl.utils.validate_usm_type(res_usm_type, allow_none=False) + o1_shape = o1.shape + o2_shape = _get_shape(o2) + if not isinstance(o2_shape, (tuple, list)): + raise TypeError( + "Shape of second argument can not be inferred. " + "Expected list or tuple." + ) + try: + res_shape = _broadcast_shape_impl( + [ + o1_shape, + o2_shape, + ] + ) + except ValueError: + raise ValueError( + "operands could not be broadcast together with shapes " + f"{o1_shape} and {o2_shape}" ) - buf_dt, res_dt = _find_buf_dtype_in_place_op( - o1_dtype, - o2_dtype, - self.result_type_resolver_fn_, - sycl_dev, + if res_shape != o1_shape: + raise ValueError( + "The shape of the non-broadcastable left-hand " + f"side {o1_shape} is inconsistent with the " + f"broadcast shape {res_shape}." ) - if res_dt is None: - raise ValueError( - f"function '{self.name_}' does not support input types " - f"({o1_dtype}, {o2_dtype}), " - "and the inputs could not be safely coerced to any " - "supported types according to the casting rule " - "''same_kind''." - ) + sycl_dev = exec_q.sycl_device + o1_dtype = o1.dtype + o2_dtype = _get_dtype(o2, sycl_dev) + if not _validate_dtype(o2_dtype): + raise ValueError("Operand has an unsupported data type") - if res_dt != o1_dtype: - raise ValueError( - f"Output array of type {res_dt} is needed, " - f"got {o1_dtype}" - ) + o1_dtype, o2_dtype = self.weak_type_resolver_( + o1_dtype, o2_dtype, sycl_dev + ) - _manager = SequentialOrderManager[exec_q] - if isinstance(o2, dpt.usm_ndarray): - src2 = o2 - if ( - ti._array_overlap(o2, o1) - and not ti._same_logical_tensors(o2, o1) - and buf_dt is None - ): - buf_dt = o2_dtype - else: - src2 = dpt.asarray(o2, dtype=o2_dtype, sycl_queue=exec_q) - if buf_dt is None: - if src2.shape != res_shape: - src2 = dpt.broadcast_to(src2, res_shape) - dep_evs = _manager.submitted_events - ht_, comp_ev = self.binary_inplace_fn_( - lhs=o1, - rhs=src2, - sycl_queue=exec_q, - depends=dep_evs, - ) - _manager.add_event_pair(ht_, comp_ev) - else: - buf = dpt.empty_like(src2, dtype=buf_dt) - dep_evs = _manager.submitted_events - ( - ht_copy_ev, - copy_ev, - ) = ti._copy_usm_ndarray_into_usm_ndarray( - src=src2, - dst=buf, - sycl_queue=exec_q, - depends=dep_evs, - ) - _manager.add_event_pair(ht_copy_ev, copy_ev) + buf_dt, res_dt = _find_buf_dtype_in_place_op( + o1_dtype, + o2_dtype, + self.result_type_resolver_fn_, + sycl_dev, + ) - buf = dpt.broadcast_to(buf, res_shape) - ht_, bf_ev = self.binary_inplace_fn_( - lhs=o1, - rhs=buf, - sycl_queue=exec_q, - depends=[copy_ev], - ) - _manager.add_event_pair(ht_, bf_ev) + if res_dt is None: + raise ValueError( + f"function '{self.name_}' does not support input types " + f"({o1_dtype}, {o2_dtype}), " + "and the inputs could not be safely coerced to any " + "supported types according to the casting rule " + "''same_kind''." + ) - return o1 - else: + if res_dt != o1_dtype: raise ValueError( - "binary function does not have a dedicated in-place " - "implementation" + f"Output array of type {res_dt} is needed, " f"got {o1_dtype}" ) + + _manager = SequentialOrderManager[exec_q] + if isinstance(o2, dpt.usm_ndarray): + src2 = o2 + if ( + ti._array_overlap(o2, o1) + and not ti._same_logical_tensors(o2, o1) + and buf_dt is None + ): + buf_dt = o2_dtype + else: + src2 = dpt.asarray(o2, dtype=o2_dtype, sycl_queue=exec_q) + if buf_dt is None: + if src2.shape != res_shape: + src2 = dpt.broadcast_to(src2, res_shape) + dep_evs = _manager.submitted_events + ht_, comp_ev = self.binary_inplace_fn_( + lhs=o1, + rhs=src2, + sycl_queue=exec_q, + depends=dep_evs, + ) + _manager.add_event_pair(ht_, comp_ev) + else: + buf = dpt.empty_like(src2, dtype=buf_dt) + dep_evs = _manager.submitted_events + ( + ht_copy_ev, + copy_ev, + ) = ti._copy_usm_ndarray_into_usm_ndarray( + src=src2, + dst=buf, + sycl_queue=exec_q, + depends=dep_evs, + ) + _manager.add_event_pair(ht_copy_ev, copy_ev) + + buf = dpt.broadcast_to(buf, res_shape) + ht_, bf_ev = self.binary_inplace_fn_( + lhs=o1, + rhs=buf, + sycl_queue=exec_q, + depends=[copy_ev], + ) + _manager.add_event_pair(ht_, bf_ev) + + return o1 From 0297fbe0d558c8fbab2ad1b57835806e37120c64 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Wed, 11 Sep 2024 12:19:49 -0700 Subject: [PATCH 08/10] Add changes to in-place element-wise operators to changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index eb7f95bde5..62efdb0ac5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +* `"same_kind"`-style casting for `tensor.usm_ndarray` in-place mathematical operators[gh-1827](https://github.com/IntelPython/dpctl/pull/1827) + ### Change ### Fixed From 7ba5c310363a5f749eba4cd8a66bd09ccc55da9d Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Wed, 11 Sep 2024 12:38:45 -0700 Subject: [PATCH 09/10] Add comments explaining logic behind various stages of `test_add_inplace_dtype_matrix` --- dpctl/tests/elementwise/test_add.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dpctl/tests/elementwise/test_add.py b/dpctl/tests/elementwise/test_add.py index e7838005b0..e331df6520 100644 --- a/dpctl/tests/elementwise/test_add.py +++ b/dpctl/tests/elementwise/test_add.py @@ -358,6 +358,8 @@ def test_add_inplace_dtype_matrix(op1_dtype, op2_dtype): dev = q.sycl_device _fp16 = dev.has_aspect_fp16 _fp64 = dev.has_aspect_fp64 + # operators use a different Python implementation which permits + # same kind style casting if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64, casting="same_kind"): ar1 += ar2 assert ( @@ -374,6 +376,9 @@ def test_add_inplace_dtype_matrix(op1_dtype, op2_dtype): with pytest.raises(ValueError): ar1 += ar2 + # here, test the special case where out is the first argument + # so an in-place kernel is used for efficiency + # this covers a specific branch in the BinaryElementwiseFunc logic ar1 = dpt.ones(sz, dtype=op1_dtype) ar2 = dpt.ones_like(ar1, dtype=op2_dtype) if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64): From 7599b37f45e000af4286c9844f770d3e6ebe5e15 Mon Sep 17 00:00:00 2001 From: ndgrigorian <46709016+ndgrigorian@users.noreply.github.com> Date: Wed, 11 Sep 2024 12:49:07 -0700 Subject: [PATCH 10/10] Fix typo in changelog entry Co-authored-by: Oleksandr Pavlyk --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 62efdb0ac5..9eb4bbc220 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -* `"same_kind"`-style casting for `tensor.usm_ndarray` in-place mathematical operators[gh-1827](https://github.com/IntelPython/dpctl/pull/1827) +* `"same_kind"`-style casting for `tensor.usm_ndarray` in-place mathematical operators [gh-1827](https://github.com/IntelPython/dpctl/pull/1827) ### Change