From efe804b8195cd0f6d39fe3ef50338a71ea93734a Mon Sep 17 00:00:00 2001 From: Vahid Tavanashad Date: Mon, 7 Oct 2024 17:26:34 -0500 Subject: [PATCH 1/3] implement dpnp.vecdot and dpnp.linalg.vecdot --- doc/reference/binary.rst | 4 +- doc/reference/linalg.rst | 2 + dpnp/dpnp_iface_linearalgebra.py | 136 +++++++- dpnp/dpnp_utils/dpnp_utils_linearalgebra.py | 328 +++++++++++++++----- dpnp/linalg/dpnp_iface_linalg.py | 56 ++++ tests/test_mathematical.py | 16 +- tests/test_product.py | 283 +++++++++++++++++ tests/test_sycl_queue.py | 29 ++ tests/test_umath.py | 2 - tests/test_usm_type.py | 26 ++ 10 files changed, 772 insertions(+), 110 deletions(-) diff --git a/doc/reference/binary.rst b/doc/reference/binary.rst index 61382a6ef6d9..97f01c895dbf 100644 --- a/doc/reference/binary.rst +++ b/doc/reference/binary.rst @@ -1,5 +1,5 @@ Bit-wise operations -================= +=================== .. https://numpy.org/doc/stable/reference/routines.bitwise.html @@ -22,7 +22,6 @@ Element-wise bit operations dpnp.bitwise_right_shift dpnp.bitwise_count - Bit packing ----------- @@ -33,7 +32,6 @@ Bit packing dpnp.packbits dpnp.unpackbits - Output formatting ----------------- diff --git a/doc/reference/linalg.rst b/doc/reference/linalg.rst index 58d667f94adc..39169a508e91 100644 --- a/doc/reference/linalg.rst +++ b/doc/reference/linalg.rst @@ -87,6 +87,7 @@ Other matrix operations ----------------------- .. autosummary:: :toctree: generated/ + :nosignatures: dpnp.diagonal dpnp.linalg.diagonal (Array API compatible) @@ -96,5 +97,6 @@ Exceptions ---------- .. autosummary:: :toctree: generated/ + :nosignatures: dpnp.linalg.linAlgError diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py index 924099ad2369..f6c83f4e50c6 100644 --- a/dpnp/dpnp_iface_linearalgebra.py +++ b/dpnp/dpnp_iface_linearalgebra.py @@ -48,6 +48,7 @@ dpnp_dot, dpnp_kron, dpnp_matmul, + dpnp_vecdot, ) __all__ = [ @@ -60,6 +61,7 @@ "outer", "tensordot", "vdot", + "vecdot", ] @@ -728,7 +730,6 @@ def matmul( dtype=None, subok=True, signature=None, - extobj=None, axes=None, axis=None, ): @@ -758,12 +759,12 @@ def matmul( order : {"C", "F", "A", "K", None}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. - axes : {list of tuples}, optional + axes : {None, list of tuples}, optional A list of tuples with indices of axes the matrix product should operate on. For instance, for the signature of ``(i,j),(j,k)->(i,k)``, the base elements are 2d matrices and these are taken to be stored in the two last axes of each argument. The corresponding axes keyword would be - [(-2, -1), (-2, -1), (-2, -1)]. + ``[(-2, -1), (-2, -1), (-2, -1)]``. Default: ``None``. Returns @@ -774,8 +775,8 @@ def matmul( Limitations ----------- - Keyword arguments `subok`, `signature`, `extobj`, and `axis` are - only supported with their default value. + Keyword arguments `subok`, `signature`, and `axis` are only supported with + their default values. Otherwise ``NotImplementedError`` exception will be raised. See Also @@ -834,7 +835,7 @@ def matmul( """ - if subok is False: + if not subok: raise NotImplementedError( "subok keyword argument is only supported by its default value." ) @@ -842,10 +843,6 @@ def matmul( raise NotImplementedError( "signature keyword argument is only supported by its default value." ) - if extobj is not None: - raise NotImplementedError( - "extobj keyword argument is only supported by its default value." - ) if axis is not None: raise NotImplementedError( "axis keyword argument is only supported by its default value." @@ -1135,6 +1132,9 @@ def vdot(a, b): -------- :obj:`dpnp.dot` : Returns the dot product. :obj:`dpnp.matmul` : Returns the matrix product. + :obj:`dpnp.vecdot` : Vector dot product of two arrays. + :obj:`dpnp.linalg.vecdot` : Array API compatible version of + :obj:`dpnp.vecdot`. Examples -------- @@ -1178,3 +1178,119 @@ def vdot(a, b): # dot product of flatten arrays return dpnp_dot(dpnp.ravel(a), dpnp.ravel(b), out=None, conjugate=True) + + +def vecdot( + x1, + x2, + /, + out=None, + *, + casting="same_kind", + order="K", + dtype=None, + subok=True, + signature=None, + axes=None, + axis=None, +): + r""" + Computes the vector dot product. + + Let :math:`\mathbf{a}` be a vector in `x1` and :math:`\mathbf{b}` be + a corresponding vector in `x2`. The dot product is defined as: + + .. math:: + \mathbf{a} \cdot \mathbf{b} = \sum_{i=0}^{n-1} \overline{a_i}b_i + + where the sum is over the last dimension (unless axis is specified) and + where :math:`\overline{a_i}` denotes the complex conjugate if :math:`a_i` + is complex and the identity otherwise. + + For full documentation refer to :obj:`numpy.vecdot`. + + Parameters + ---------- + x1 : {dpnp.ndarray, usm_ndarray} + First input array. + x2 : {dpnp.ndarray, usm_ndarray} + Second input array. + out : {None, dpnp.ndarray, usm_ndarray}, optional + A location into which the result is stored. If provided, it must have + a shape that the broadcasted shape of `x1` and `x2` with the last axis + removed. If not provided or ``None``, a freshly-allocated array is + used. + Default: ``None``. + dtype : {None, dtype}, optional + Type to use in computing the vector dot product. By default, the + returned array will have data type that is determined by considering + Promotion Type Rule and device capabilities. + casting : {"no", "equiv", "safe", "same_kind", "unsafe"}, optional + Controls what kind of data casting may occur. + Default: ``"same_kind"``. + order : {"C", "F", "A", "K", None}, optional + Memory layout of the newly output array, if parameter `out` is ``None``. + Default: ``"K"``. + axes : {None, list of tuples}, optional + A list of tuples with indices of axes the matrix product should operate + on. For instance, for the signature of ``(i),(i)->()``, the base + elements are vectors and these are taken to be stored in the last axes + of each argument. The corresponding axes keyword would be + ``[(-1,), (-1), ()]``. + Default: ``None``. + axis : {None, int}, optional + Axis over which to compute the dot product. This is a short-cut for + passing in axes with entries of ``(axis,)`` for each + single-core-dimension argument and ``()`` for all others. For instance, + for a signature ``(i),(i)->()``, it is equivalent to passing in + ``axes=[(axis,), (axis,), ()]``. + Default: ``None``. + + Returns + ------- + out : dpnp.ndarray + The vector dot product of the inputs. + This is a 0-d array only when both `x1`, `x2` are 1-d vectors. + + Limitations + ----------- + Keyword arguments `subok`, and `signature` are only supported with their + default values. Otherwise ``NotImplementedError`` exception will be raised. + + See Also + -------- + :obj:`dpnp.linalg.vecdot` : Array API compatible version. + :obj:`dpnp.vdot` : Complex-conjugating dot product. + :obj:`dpnp.einsum` : Einstein summation convention. + + Examples + -------- + Get the projected size along a given normal for an array of vectors. + + >>> import dpnp as np + >>> v = np.array([[0., 5., 0.], [0., 0., 10.], [0., 6., 8.]]) + >>> n = np.array([0., 0.6, 0.8]) + >>> np.vecdot(v, n) + array([ 3., 8., 10.]) + + """ + + if not subok: + raise NotImplementedError( + "subok keyword argument is only supported by its default value." + ) + if signature is not None: + raise NotImplementedError( + "signature keyword argument is only supported by its default value." + ) + + return dpnp_vecdot( + x1, + x2, + out=out, + casting=casting, + order=order, + dtype=dtype, + axes=axes, + axis=axis, + ) diff --git a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py index e15bd93d7bbd..9d7459ada02c 100644 --- a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py +++ b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py @@ -36,7 +36,7 @@ from dpnp.dpnp_array import dpnp_array from dpnp.dpnp_utils import get_usm_allocations -__all__ = ["dpnp_cross", "dpnp_dot", "dpnp_kron", "dpnp_matmul"] +__all__ = ["dpnp_cross", "dpnp_dot", "dpnp_kron", "dpnp_matmul", "dpnp_vecdot"] def _compute_res_dtype(*arrays, sycl_queue, dtype=None, casting="no"): @@ -187,7 +187,7 @@ def _define_contig_flag(x): return flag, x_is_c_contiguous, x_is_f_contiguous -def _define_dim_flags(x, pos): +def _define_dim_flags(x, axis): """ Define useful flags for the main calculation in dpnp_matmul. x_is_1D: `x` is 1D array or inherently 1D (all dimensions are equal to one @@ -200,14 +200,14 @@ def _define_dim_flags(x, pos): if x.shape = (3, 4, 1, 2), then x_base_is_1D = True """ - index = -1 if pos == 0 else -2 x_shape = x.shape x_ndim = x.ndim x_is_1D = x_ndim == 1 if numpy.prod(x_shape) != 0: + # if the first condition is valid, the 2nd condition is not checked # the 2nd condition is only expected to be checked for - # ND-arrays (N>1) since index is in [-2, -1] - x_is_1D = x_is_1D or numpy.prod(x_shape) == x_shape[index] + # ND-arrays (N>1) since axis could be -2 + x_is_1D = x_is_1D or numpy.prod(x_shape) == x_shape[axis] x_is_2D = False if not x_is_1D: @@ -220,7 +220,7 @@ def _define_dim_flags(x, pos): return x_is_2D, x_is_1D, x_base_is_1D -def _get_result_shape(x1, x2, out, np_flag): +def _get_result_shape(x1, x2, out, func, np_flag): """ Three task are completed in this function: - Get the shape of the result array. @@ -230,36 +230,55 @@ def _get_result_shape(x1, x2, out, np_flag): x1_ndim = x1.ndim x2_ndim = x2.ndim - x1_shape = x1.shape - x2_shape = x2.shape - if x1_ndim == 0: raise ValueError( - "Input array 0 does not have enough dimensions (has 0, but requires at least 1)" + "The first input array does not have enough dimensions (has 0, but requires at least 1)" ) if x2_ndim == 0: raise ValueError( - "Input array 1 does not have enough dimensions (has 0, but requires at least 1)" + "The second input array does not have enough dimensions (has 0, but requires at least 1)" + ) + + if func == "matmul": + x1, x2, result_shape = _get_result_shape_matmul( + x1, x2, x1_ndim, x2_ndim ) + else: # func == "vecdot" + x1, x2, result_shape = _get_result_shape_vecdot( + x1, x2, x1_ndim, x2_ndim + ) + + if out is not None: + out_shape = out.shape + if out_shape != result_shape and not np_flag: + _shape_error(result_shape, out_shape, None, err_msg=2) + + return x1, x2, result_shape + + +def _get_result_shape_matmul(x1, x2, x1_ndim, x2_ndim): + + x1_shape = x1.shape + x2_shape = x2.shape if x1_ndim == 1 and x2_ndim == 1: if x1_shape[-1] != x2_shape[-1]: - _shape_error(x1_shape[-1], x2_shape[-1], None, err_msg=0) + _shape_error(x1_shape[-1], x2_shape[-1], "matmul", err_msg=0) result_shape = () elif x1_ndim == 1: if x1_shape[-1] != x2_shape[-2]: - _shape_error(x1_shape[-1], x2_shape[-2], None, err_msg=0) + _shape_error(x1_shape[-1], x2_shape[-2], "matmul", err_msg=0) result_shape = x2_shape[:-2] + (x2_shape[-1],) elif x2_ndim == 1: if x1_shape[-1] != x2_shape[-1]: - _shape_error(x1_shape[-1], x2_shape[-1], None, err_msg=0) + _shape_error(x1_shape[-1], x2_shape[-1], "matmul", err_msg=0) result_shape = x1_shape[:-1] else: # at least 2D - x1_is_2D, x1_is_1D, _ = _define_dim_flags(x1, pos=0) - x2_is_2D, x2_is_1D, _ = _define_dim_flags(x2, pos=1) + x1_is_2D, x1_is_1D, _ = _define_dim_flags(x1, axis=-1) + x2_is_2D, x2_is_1D, _ = _define_dim_flags(x2, axis=-2) if x1_shape[-1] != x2_shape[-2]: - _shape_error(x1_shape[-1], x2_shape[-2], None, err_msg=0) + _shape_error(x1_shape[-1], x2_shape[-2], "matmul", err_msg=0) if x1_ndim == 2 and x2_ndim == 2: result_shape = (x1_shape[-2], x2_shape[-1]) @@ -292,33 +311,56 @@ def _get_result_shape(x1, x2, out, np_flag): if not (x2_is_2D or x2_is_1D): x2 = dpnp.repeat(x2, x1_shape[i], axis=i) else: - _shape_error( - x1_shape[:-2], x2_shape[:-2], None, err_msg=2 - ) + _shape_error(x1_shape, x2_shape, "matmul", err_msg=1) result_shape = tuple(tmp_shape) + (x1.shape[-2], x2.shape[-1]) - if out is not None: - out_shape = out.shape - if out_shape != result_shape and not np_flag: - len_out = len(out_shape) - len_res = len(result_shape) - if len_out != len_res: - _shape_error(len_out, len_res, None, err_msg=4) - for i in range(len_out): - if out_shape[i] != result_shape[i]: - if i == len_out - 1: - _shape_error( - out_shape[i], result_shape[i], 1, err_msg=1 - ) - elif i == len_out - 2: - _shape_error( - out_shape[i], result_shape[i], 0, err_msg=1 - ) - else: - _shape_error( - out_shape[:-2], result_shape[:-2], None, err_msg=3 - ) + return x1, x2, result_shape + + +def _get_result_shape_vecdot(x1, x2, x1_ndim, x2_ndim): + + x1_shape = x1.shape + x2_shape = x2.shape + + if x1_shape[-1] != x2_shape[-1]: + _shape_error(x1_shape[-1], x2_shape[-1], "vecdot", err_msg=0) + + _, x1_is_1D, _ = _define_dim_flags(x1, axis=-1) + _, x2_is_1D, _ = _define_dim_flags(x2, axis=-1) + + if x1_ndim == 1 and x2_ndim == 1: + result_shape = () + elif x1_is_1D: + result_shape = x2_shape[:-1] + elif x2_is_1D: + result_shape = x1_shape[:-1] + else: # at least 2D + if x1_ndim != x2_ndim: + diff = abs(x1_ndim - x2_ndim) + if x1_ndim < x2_ndim: + x1 = dpnp.reshape(x1, ((1,) * diff + x1.shape)) + x1_shape = x1.shape + else: + x2 = dpnp.reshape(x2, ((1,) * diff + x2.shape)) + x2_shape = x2.shape + + # examining the option to align inputs when their + # shapes differ but the shape of one of them is 1 + # in that dimension (similar to braodcasting concept) + tmp_shape = list(x1_shape[:-1]) + for i in range(len(tmp_shape)): + if x1_shape[i] != x2_shape[i]: + if x1_shape[i] == 1: + tmp_shape[i] = x2_shape[i] + # Unlike `matmul`, no need to duplicate data for the + # dimension with shape equal to one; dpt.vecdot handles it + elif x2_shape[i] == 1: + pass + else: + _shape_error(x1_shape, x2_shape, "vecdot", err_msg=1) + + result_shape = tuple(tmp_shape) return x1, x2, result_shape @@ -423,37 +465,29 @@ def _gemm_matmul(exec_q, x1, x2, res): return res -def _shape_error(a, b, core_dim, err_msg): +def _shape_error(shape1, shape2, func, err_msg): + + if func == "matmul": + signature = "(n?,k),(k,m?)->(n?,m?)" + else: # func == "vecdot" + signature = "(n?,),(n?,)->()" + if err_msg == 0: raise ValueError( "Input arrays have a mismatch in their core dimensions. " "The core dimensions should follow this signature: " - f"(n?,k),(k,m?)->(n?,m?) (size {a} is different from {b})" + f"{signature} (size {shape1} is different from {shape2})" ) elif err_msg == 1: raise ValueError( - f"Output array has a mismatch in its core dimension {core_dim}. " - "The core dimensions should follow this signature: " - f"(n?,k),(k,m?)->(n?,m?) (size {a} is different from {b})" + "The shapes of the input arrays are incompatible. " + f"The first input array has shape {shape1} and the second input " + f"array has shape {shape2}. " + f"These cannot be broadcast together for '{func}' function." ) elif err_msg == 2: raise ValueError( - "Leading element(s) of the input arrays' shape do not match and " - "they could not be broadcast together, the leading element(s) of " - f"the input array 0 is {a} and it is different from leading " - f"element(s) of the input array 1 which is {b}." - ) - elif err_msg == 3: - raise ValueError( - "The shape of the output array does not have similar leading " - "elements to the shape of input arrays, the leading element(s) of " - f"the output array is {a} and it is different from the leading " - f"element(s) of the input arrays which is {b}." - ) - elif err_msg == 4: - raise ValueError( - "Output array does not have enough dimensions " - f"(has {a} while requires {b})" + f"Expected output array of shape {shape1}, but got {shape2}." ) @@ -479,7 +513,7 @@ def _standardize_strides_to_nonzero(strides, shape): return stndrd_strides -def _validate_axes(x1, x2, axes): +def _validate_axes(x1, x2, axes, func): """Check axes is valid for matmul function.""" def _validate_internal(axes, i, ndim): @@ -514,19 +548,24 @@ def _validate_internal(axes, i, ndim): if not isinstance(axes, list): raise TypeError("Axes should be a list.") - else: - if len(axes) != 3: - raise ValueError( - "Axes should be a list of three tuples for inputs and output." - ) + elif len(axes) != 3: + raise ValueError( + "Axes should be a list of three tuples: two inputs and one output." + ) - axes[0] = _validate_internal(axes[0], 0, x1.ndim) - axes[1] = _validate_internal(axes[1], 1, x2.ndim) + if func == "matmul": + x1_ndim = x1.ndim + x2_ndim = x2.ndim + else: # func == "vecdot" + x1_ndim = x2_ndim = 1 - if x1.ndim == 1 and x2.ndim == 1: + axes[0] = _validate_internal(axes[0], 0, x1_ndim) + axes[1] = _validate_internal(axes[1], 1, x2_ndim) + + if x1_ndim == 1 and x2_ndim == 1: if axes[2] != (): raise TypeError("Axes item 2 should be an empty tuple.") - elif x1.ndim == 1 or x2.ndim == 1: + elif x1_ndim == 1 or x2_ndim == 1: axes[2] = _validate_internal(axes[2], 2, 1) else: axes[2] = _validate_internal(axes[2], 2, 2) @@ -709,10 +748,8 @@ def dpnp_matmul( """ Return the matrix product of two arrays. - The main calculation is done by calling an extension function - for BLAS library of OneMKL. Depending on dimension of `x1` and `x2` arrays, - it will be either ``gemm`` (for one- and two-dimentional arrays) or - ``gemm_batch``(for others). + The main calculation is performed by calling an extension function + for BLAS library of OneMKL. """ @@ -726,7 +763,7 @@ def dpnp_matmul( ) if order in ["a", "A"]: - if x1.flags.f_contiguous and x2.flags.f_contiguous: + if x1.flags.fnc and x2.flags.fnc: order = "F" else: order = "C" @@ -734,7 +771,7 @@ def dpnp_matmul( x1_ndim = x1.ndim x2_ndim = x2.ndim if axes is not None: - axes = _validate_axes(x1, x2, axes) + axes = _validate_axes(x1, x2, axes, "matmul") axes_x1, axes_x2, axes_res = axes axes_x1 = normalize_axis_tuple(axes_x1, x1_ndim, "axis") @@ -753,14 +790,14 @@ def dpnp_matmul( elif len(axes_res) == 1: out = dpnp.moveaxis(out, axes_res, (-1,)) - # With these conditions, the result is a 0D array. However, - # NumPy allows out to have any shape and the result is expanded to it + # When inputs are 1-D arrays, the result is a 0-D array. For this case, + # NumPy allows out keyword to have any shape and the result is broadcast to it NumPy_special_behavior = ( out is not None and x1_ndim == 1 and x2_ndim == 1 and out.shape != () ) x1, x2, result_shape = _get_result_shape( - x1, x2, out, NumPy_special_behavior + x1, x2, out, "matmul", NumPy_special_behavior ) # Determine the appropriate data types @@ -772,8 +809,8 @@ def dpnp_matmul( transpose = False x1_shape = x1.shape x2_shape = x2.shape - x1_is_2D, x1_is_1D, x1_base_is_1D = _define_dim_flags(x1, pos=0) - x2_is_2D, x2_is_1D, x2_base_is_1D = _define_dim_flags(x2, pos=1) + x1_is_2D, x1_is_1D, x1_base_is_1D = _define_dim_flags(x1, axis=-1) + x2_is_2D, x2_is_1D, x2_base_is_1D = _define_dim_flags(x2, axis=-2) # TODO: investigate usage of syrk function from BLAS in # case of a.T @ a and a @ a.T to gain performance. @@ -939,3 +976,128 @@ def dpnp_matmul( # out and out_orig contain the same data but they have different shape return out_orig return result + + +def dpnp_vecdot( + x1, + x2, + /, + out=None, + *, + casting="same_kind", + order="K", + dtype=None, + axes=None, + axis=None, +): + """Vector dot product of two arrays.""" + + dpnp.check_supported_arrays_type(x1, x2) + res_usm_type, exec_q = get_usm_allocations([x1, x2]) + if out is not None: + dpnp.check_supported_arrays_type(out) + if dpctl.utils.get_execution_queue((exec_q, out.sycl_queue)) is None: + raise ExecutionPlacementError( + "Input and output allocation queues are not compatible" + ) + + if order in ["a", "A"]: + if x1.flags.fnc and x2.flags.fnc: + order = "F" + else: + order = "C" + + x1_ndim = x1.ndim + x2_ndim = x2.ndim + if axes is None and axis is None: + # default behavior with axis=-1 + pass + elif axes is not None: + if axis is not None: + raise TypeError("cannot specify both `axis` and `axes`.") + + axes = _validate_axes(x1, x2, axes, "vecdot") + + axes_x1, axes_x2, axes_res = axes + axes_x1 = normalize_axis_tuple(axes_x1, x1_ndim, "axis") + axes_x2 = normalize_axis_tuple(axes_x2, x2_ndim, "axis") + + # Move the axes that are going to be used in dot product, + # to the end of "x1" and "x2" + x1 = dpnp.moveaxis(x1, axes_x1, -1) if x1_ndim != 1 else x1 + x2 = dpnp.moveaxis(x2, axes_x2, -1) if x2_ndim != 1 else x2 + else: + x1 = dpnp.moveaxis(x1, axis, -1) if x1_ndim != 1 else x1 + x2 = dpnp.moveaxis(x2, axis, -1) if x2_ndim != 1 else x2 + + # When inputs are 1-D arrays, the result is a 0-D array. For this case, + # NumPy allows out keyword to have any shape and the result is broadcast to it + NumPy_special_behavior = ( + out is not None and x1_ndim == 1 and x2_ndim == 1 and out.shape != () + ) + + x1, x2, result_shape = _get_result_shape( + x1, x2, out, "vecdot", NumPy_special_behavior + ) + + # Determine the appropriate data types + _, res_dtype = _compute_res_dtype( + x1, x2, dtype=dtype, casting=casting, sycl_queue=exec_q + ) + + _, x1_is_1D, _ = _define_dim_flags(x1, axis=-1) + _, x2_is_1D, _ = _define_dim_flags(x2, axis=-1) + + if numpy.prod(result_shape) == 0 or x1.size == 0 or x2.size == 0: + order = "C" if order in "kK" else order + result = _create_result_array( + x1, + x2, + out, + shape=result_shape, + dtype=res_dtype, + usm_type=res_usm_type, + sycl_queue=exec_q, + order=order, + ) + if x1.size == 0 or x2.size == 0: + result.fill(0) + return result + elif x1_is_1D and x2_is_1D: + call_flag = "dot" + # arrays are inehrently 1D, make them 1D + x1 = dpnp.ravel(x1) + x2 = dpnp.ravel(x2) + else: + # TODO: if a batch version of MKL dot is implemented, can be used here + call_flag = "vecdot" + + # dispatch to proper function call + if call_flag == "dot": + if out is not None and out.shape != (): + result = dpnp_dot(x1, x2, out=None, conjugate=True) + else: + result = dpnp_dot(x1, x2, out=out, conjugate=True) + else: # call_flag == "vecdot" + x1_usm = dpnp.get_usm_ndarray(x1) + x2_usm = dpnp.get_usm_ndarray(x2) + result = dpnp_array._create_from_usm_ndarray( + dpt.vecdot(x1_usm, x2_usm, axis=-1) + ) + + if NumPy_special_behavior: + result = dpnp.tile(result, out.shape) + elif result.shape != result_shape: + result = dpnp.reshape(result, result_shape) + + if out is None: + # If `order` was not passed as default + # we need to update it to match the passed `order`. + if order not in "kK": + return dpnp.asarray(result, order=order) + # dpnp.ascontiguousarray changes 0-D array to 1-D array + if result.ndim == 0: + return result + return dpnp.ascontiguousarray(result) + + return dpnp.get_result_array(result, out, casting=casting) diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py index ce7504c18d69..9cf3eea2edec 100644 --- a/dpnp/linalg/dpnp_iface_linalg.py +++ b/dpnp/linalg/dpnp_iface_linalg.py @@ -95,6 +95,7 @@ "tensorinv", "tensorsolve", "trace", + "vecdot", "vector_norm", ] @@ -2161,6 +2162,61 @@ def trace(x, /, *, offset=0, dtype=None): return dpnp.trace(x, offset, axis1=-2, axis2=-1, dtype=dtype) +def vecdot(x1, x2, /, *, axis=-1): + r""" + Computes the vector dot product. + + This function is restricted to arguments compatible with the Array API, + contrary to :obj:`dpnp.vecdot`. + + Let :math:`\mathbf{a}` be a vector in `x1` and :math:`\mathbf{b}` be + a corresponding vector in `x2`. The dot product is defined as: + + .. math:: + \mathbf{a} \cdot \mathbf{b} = \sum_{i=0}^{n-1} \overline{a_i}b_i + + over the dimension specified by `axis` and where :math:`\overline{a_i}` + denotes the complex conjugate if :math:`a_i` is complex and the identity + otherwise. + + For full documentation refer to :obj:`numpy.linalg.vecdot`. + + Parameters + ---------- + x1 : {dpnp.ndarray, usm_ndarray} + First input array. + x2 : {dpnp.ndarray, usm_ndarray} + Second input array. + axis : int, optional + Axis over which to compute the dot product. + Default: ``-1``. + + Returns + ------- + out : dpnp.ndarray + The vector dot product of the inputs. + + See Also + -------- + :obj:`dpnp.vecdot` : Similar function with support for more + keyword arguments. + :obj:`dpnp.vdot` : Complex-conjugating dot product. + + Examples + -------- + Get the projected size along a given normal for an array of vectors. + + >>> import dpnp as np + >>> v = np.array([[0., 5., 0.], [0., 0., 10.], [0., 6., 8.]]) + >>> n = np.array([0., 0.6, 0.8]) + >>> np.linalg.vecdot(v, n) + array([ 3., 8., 10.]) + + """ + + return dpnp.vecdot(x1, x2, axis=axis) + + def vector_norm(x, /, *, axis=None, keepdims=False, ord=2): """ Computes the vector norm of a vector (or batch of vectors) `x`. diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py index e511f438d3e5..a3c29eac3223 100644 --- a/tests/test_mathematical.py +++ b/tests/test_mathematical.py @@ -3929,9 +3929,8 @@ def test_matmul_out3(self, trans, dtype): ], ) def test_matmul_out_0D(self, out_shape): - # for matmul of 0-D arrays with out keyword, - # NumPy repeats the data to match the shape - # of output array + # for matmul of 1-D arrays, output is 0-D and if out keyword is given + # NumPy repeats the data to match the shape of output array a = numpy.arange(3) b = dpnp.asarray(a) @@ -4093,15 +4092,8 @@ def test_matmul_not_implemented(self): dpnp.matmul(a1, a2, subok=False) with pytest.raises(NotImplementedError): - dpnp.matmul( - a1, a2, signature=(dpnp.float32, dpnp.float32, dpnp.float32) - ) - - def custom_error_callback(err): - print("Custom error callback triggered with error:", err) - - with pytest.raises(NotImplementedError): - dpnp.matmul(a1, a2, extobj=[32, 1, custom_error_callback]) + signature = (dpnp.float32, dpnp.float32, dpnp.float32) + dpnp.matmul(a1, a2, signature=signature) with pytest.raises(NotImplementedError): dpnp.matmul(a1, a2, axis=2) diff --git a/tests/test_product.py b/tests/test_product.py index 6c199b070d25..5fb61c81b43a 100644 --- a/tests/test_product.py +++ b/tests/test_product.py @@ -1329,3 +1329,286 @@ def test_vdot_error(self): # The second array should be of size one with pytest.raises(ValueError): dpnp.vdot(b, a) + + +@testing.with_requires("numpy>=2.0") +class TestVecdot: + def setup_method(self): + numpy.random.seed(42) + + @pytest.mark.parametrize( + "dtype", get_all_dtypes(no_none=True, no_complex=True) + ) + @pytest.mark.parametrize( + "shape_pair", + [ + ((4,), (4,)), # call_flag: dot + ((3, 1), (3, 1)), + ((2, 0), (2, 0)), # zero-size inputs, 1D output + ((3, 0, 4), (3, 0, 4)), # zero-size output + ((3, 4), (3, 4)), + ((1, 4), (3, 4)), + ((4,), (3, 4)), + ((3, 4), (1, 4)), + ((3, 4), (4,)), + ((1, 4, 5), (3, 1, 5)), + ((1, 1, 4, 5), (3, 1, 5)), + ], + ) + def test_basic(self, dtype, shape_pair): + shape1, shape2 = shape_pair + size1 = numpy.prod(shape1, dtype=int) + size2 = numpy.prod(shape2, dtype=int) + x1 = numpy.random.uniform(-5, 5, size1) + x2 = numpy.random.uniform(-5, 5, size2) + a = numpy.array(x1, dtype=dtype).reshape(shape1) + b = numpy.array(x2, dtype=dtype).reshape(shape2) + ia = dpnp.array(a) + ib = dpnp.array(b) + + result = dpnp.vecdot(ia, ib) + expected = numpy.vecdot(a, b) + assert_dtype_allclose(result, expected) + + @pytest.mark.parametrize("dtype", get_complex_dtypes()) + @pytest.mark.parametrize( + "shape_pair", + [ + ((4,), (4,)), # call_flag: dot + ((3, 1), (3, 1)), + ((2, 0), (2, 0)), # zero-size inputs, 1D output + ((3, 0, 4), (3, 0, 4)), # zero-size output + ((3, 4), (3, 4)), + ((1, 4), (3, 4)), + ((4,), (3, 4)), + ((3, 4), (1, 4)), + ((3, 4), (4,)), + ((1, 4, 5), (3, 1, 5)), + ((1, 1, 4, 5), (3, 1, 5)), + ], + ) + def test_complex(self, dtype, shape_pair): + shape1, shape2 = shape_pair + size1 = numpy.prod(shape1, dtype=int) + size2 = numpy.prod(shape2, dtype=int) + x11 = numpy.random.uniform(-5, 5, size1) + x12 = numpy.random.uniform(-5, 5, size1) + x21 = numpy.random.uniform(-5, 5, size2) + x22 = numpy.random.uniform(-5, 5, size2) + a = numpy.array(x11 + 1j * x12, dtype=dtype).reshape(shape1) + b = numpy.array(x21 + 1j * x22, dtype=dtype).reshape(shape2) + ia = dpnp.array(a) + ib = dpnp.array(b) + + result = dpnp.vecdot(ia, ib) + expected = numpy.vecdot(a, b) + assert_dtype_allclose(result, expected) + + @pytest.mark.parametrize("axis", [0, 2, -2]) + @pytest.mark.parametrize( + "shape_pair", + [((4,), (4, 4, 4)), ((3, 4, 5), (3, 4, 5))], + ) + def test_axis1(self, axis, shape_pair): + shape1, shape2 = shape_pair + size1 = numpy.prod(shape1, dtype=int) + size2 = numpy.prod(shape2, dtype=int) + a = numpy.array(numpy.random.uniform(-5, 5, size1)).reshape(shape1) + b = numpy.array(numpy.random.uniform(-5, 5, size2)).reshape(shape2) + ia = dpnp.array(a) + ib = dpnp.array(b) + + result = dpnp.vecdot(ia, ib) + expected = numpy.vecdot(a, b) + assert_dtype_allclose(result, expected) + + def test_axis2(self): + # This is a special case, `a`` cannot be broadcast_to `b` + a = numpy.arange(4).reshape(1, 4) + b = numpy.arange(60).reshape(3, 4, 5) + ia = dpnp.array(a) + ib = dpnp.array(b) + + result = dpnp.vecdot(ia, ib, axis=1) + expected = numpy.vecdot(a, b, axis=1) + assert_dtype_allclose(result, expected) + + @pytest.mark.parametrize( + "axes", + [ + [(1,), (1,), ()], + [(0), (0), ()], + [0, 1, ()], + [-2, -1, ()], + ], + ) + def test_axes(self, axes): + a = numpy.array(numpy.random.uniform(-10, 10, 125)).reshape(5, 5, 5) + ia = dpnp.array(a) + + result = dpnp.vecdot(ia, ia, axes=axes) + expected = numpy.vecdot(a, a, axes=axes) + assert_dtype_allclose(result, expected) + + out = dpnp.empty((5, 5), dtype=ia.dtype) + result = dpnp.vecdot(ia, ia, axes=axes, out=out) + assert out is result + assert_dtype_allclose(result, expected) + + @pytest.mark.parametrize( + "axes, b_shape", + [ + ([1, 0, ()], (3,)), + ([1, 0, ()], (3, 1)), + ([1, 1, ()], (1, 3)), + ], + ) + def test_axes_out_1D(self, axes, b_shape): + a = numpy.arange(60).reshape(4, 3, 5) + b = numpy.arange(3).reshape(b_shape) + ia = dpnp.array(a) + ib = dpnp.array(b) + + out_dp = dpnp.empty((4, 5)) + out_np = numpy.empty((4, 5)) + result = dpnp.vecdot(ia, ib, axes=axes, out=out_dp) + assert result is out_dp + expected = numpy.vecdot(a, b, axes=axes, out=out_np) + assert_dtype_allclose(result, expected) + + @pytest.mark.parametrize("stride", [2, -1, -2]) + def test_strided(self, stride): + a = numpy.arange(100).reshape(10, 10) + b = numpy.arange(100).reshape(10, 10) + ia = dpnp.array(a) + ib = dpnp.array(b) + + result = dpnp.vecdot(ia[::stride, ::stride], ib[::stride, ::stride]) + expected = numpy.vecdot(a[::stride, ::stride], b[::stride, ::stride]) + assert_dtype_allclose(result, expected) + + @pytest.mark.parametrize("dtype1", get_all_dtypes()) + @pytest.mark.parametrize("dtype2", get_all_dtypes()) + def test_input_dtype_matrix(self, dtype1, dtype2): + x1 = numpy.random.uniform(-5, 5, 10) + x2 = numpy.random.uniform(-5, 5, 10) + a = numpy.array(x1, dtype=dtype1).reshape(2, 5) + b = numpy.array(x2, dtype=dtype2).reshape(2, 5) + ia = dpnp.array(a) + ib = dpnp.array(b) + + result = dpnp.vecdot(ia, ib) + expected = numpy.vecdot(a, b) + assert_dtype_allclose(result, expected) + + @pytest.mark.parametrize("order", ["C", "F", "K", "A"]) + @pytest.mark.parametrize( + "shape", + [((4, 3)), ((4, 3, 5)), ((6, 7, 3, 5))], + ids=["((4, 3))", "((4, 3, 5))", "((6, 7, 3, 5))"], + ) + def test_order(self, order, shape): + a = numpy.arange(numpy.prod(shape)).reshape(shape) + b = dpnp.asarray(a) + + result = dpnp.vecdot(b, b, order=order) + expected = numpy.vecdot(a, a, order=order) + assert result.flags.c_contiguous == expected.flags.c_contiguous + assert result.flags.f_contiguous == expected.flags.f_contiguous + assert_dtype_allclose(result, expected) + + @pytest.mark.parametrize( + "order1, order2, out_order", + [ + ("C", "C", "C"), + ("C", "C", "F"), + ("C", "F", "C"), + ("C", "F", "F"), + ("F", "C", "C"), + ("F", "C", "F"), + ("F", "F", "F"), + ("F", "F", "C"), + ], + ) + def test_out(self, order1, order2, out_order): + a1 = numpy.arange(20).reshape(5, 4, order=order1) + a2 = numpy.arange(20).reshape(5, 4, order=order2) + + b1 = dpnp.asarray(a1) + b2 = dpnp.asarray(a2) + + dpnp_out = dpnp.empty(5, order=out_order) + result = dpnp.vecdot(b1, b2, out=dpnp_out) + assert result is dpnp_out + + out = numpy.empty(5, order=out_order) + expected = numpy.vecdot(a1, a2, out=out) + assert result.flags.c_contiguous == expected.flags.c_contiguous + assert result.flags.f_contiguous == expected.flags.f_contiguous + assert_dtype_allclose(result, expected) + + @pytest.mark.parametrize( + "out_shape", + [ + ((4, 5)), + ((6,)), + ((4, 7, 2)), + ], + ) + def test_out_0D(self, out_shape): + # for vecdot of 1-D arrays, output is 0-D and if out keyword is given + # NumPy repeats the data to match the shape of output array + a = numpy.arange(3) + b = dpnp.asarray(a) + + out_np = numpy.empty(out_shape) + out_dp = dpnp.empty(out_shape) + result = dpnp.vecdot(b, b, out=out_dp) + expected = numpy.vecdot(a, a, out=out_np) + assert result is out_dp + assert_dtype_allclose(result, expected) + + @pytest.mark.parametrize("axis", [0, 1, 2, -1, -2, -3]) + def test_linalg_vecdot(self, axis): + x11 = numpy.random.uniform(-5, 5, 4) + x12 = numpy.random.uniform(-5, 5, 4) + x21 = numpy.random.uniform(-5, 5, 64) + x22 = numpy.random.uniform(-5, 5, 64) + a = numpy.array(x11 + 1j * x12, dtype=numpy.complex64) + b = numpy.array(x21 + 1j * x22, dtype=numpy.complex64).reshape(4, 4, 4) + ia = dpnp.array(a) + ib = dpnp.array(b) + + result = dpnp.linalg.vecdot(ia, ib) + expected = numpy.linalg.vecdot(a, b) + assert_dtype_allclose(result, expected) + + def test_error(self): + a = dpnp.ones((5, 4)) + b = dpnp.ones((5, 5)) + # core dimension differs + assert_raises(ValueError, dpnp.vecdot, a, b) + + a = dpnp.ones((5, 4)) + b = dpnp.ones((3, 4)) + # input array not compatible + assert_raises(ValueError, dpnp.vecdot, a, b) + + a = dpnp.ones((3, 4)) + b = dpnp.ones((3, 4)) + c = dpnp.empty((5,)) + # out array shape is incorrect + assert_raises(ValueError, dpnp.vecdot, a, b, out=c) + + # both axes and axis cannot be specified + a = dpnp.ones((5, 5)) + assert_raises(TypeError, dpnp.vecdot, a, a, axes=[0, 0, ()], axis=-1) + + # subok keyword is not supported + assert_raises(NotImplementedError, dpnp.vecdot, a, a, subok=False) + + # signature keyword is not supported + signature = (dpnp.float32, dpnp.float32, dpnp.float32) + assert_raises( + NotImplementedError, dpnp.vecdot, a, a, signature=signature + ) diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index 74c65a65a59c..51221f583484 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -1013,6 +1013,35 @@ def test_matmul(device, shape_pair): assert_sycl_queue_equal(result_queue, b2.sycl_queue) +@pytest.mark.parametrize( + "device", + valid_devices, + ids=[device.filter_string for device in valid_devices], +) +@pytest.mark.parametrize( + "shape_pair", + [ + ((4,), (4,)), # call_flag: dot + ((3, 1), (3, 1)), + ((2, 0), (2, 0)), # zero-size inputs, 1D output + ((3, 0, 4), (3, 0, 4)), # zero-size output + ((3, 4), (3, 4)), # call_flag: vecdot + ], +) +def test_vecdot(device, shape_pair): + shape1, shape2 = shape_pair + a1 = numpy.arange(numpy.prod(shape1)).reshape(shape1) + a2 = numpy.arange(numpy.prod(shape2)).reshape(shape2) + + b1 = dpnp.asarray(a1, device=device) + b2 = dpnp.asarray(a2, device=device) + + result = dpnp.vecdot(b1, b2) + result_queue = result.sycl_queue + assert_sycl_queue_equal(result_queue, b1.sycl_queue) + assert_sycl_queue_equal(result_queue, b2.sycl_queue) + + @pytest.mark.parametrize( "func, kwargs", [ diff --git a/tests/test_umath.py b/tests/test_umath.py index 8feec65ed23c..19b5ef6b5034 100644 --- a/tests/test_umath.py +++ b/tests/test_umath.py @@ -73,11 +73,9 @@ def get_id(val): # implement missing umaths and to remove the list -# SAT-7324 vecdot # SAT-7323 bitwise_count new_umaths_numpy_20 = [ "bitwise_count", - "vecdot", ] diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py index 2c1fb32d2301..aeb13ae30733 100644 --- a/tests/test_usm_type.py +++ b/tests/test_usm_type.py @@ -498,6 +498,32 @@ def test_matmul(usm_type_x, usm_type_y, shape_pair): assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y]) +@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types) +@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types) +@pytest.mark.parametrize( + "shape_pair", + [ + ((4,), (4,)), # call_flag: dot + ((3, 1), (3, 1)), + ((2, 0), (2, 0)), # zero-size inputs, 1D output + ((3, 0, 4), (3, 0, 4)), # zero-size output + ((3, 4), (3, 4)), # call_flag: vecdot + ], +) +def test_vecdot(usm_type_x, usm_type_y, shape_pair): + shape1, shape2 = shape_pair + x = numpy.arange(numpy.prod(shape1)).reshape(shape1) + y = numpy.arange(numpy.prod(shape2)).reshape(shape2) + + x = dp.array(x, usm_type=usm_type_x) + y = dp.array(y, usm_type=usm_type_y) + z = dp.vecdot(x, y) + + assert x.usm_type == usm_type_x + assert y.usm_type == usm_type_y + assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y]) + + @pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types) @pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types) def test_meshgrid(usm_type_x, usm_type_y): From dd22aae43470fb427aa0543ea4466b999be1ff28 Mon Sep 17 00:00:00 2001 From: Vahid Tavanashad Date: Tue, 15 Oct 2024 02:00:11 -0500 Subject: [PATCH 2/3] improve coverage --- dpnp/dpnp_utils/dpnp_utils_linearalgebra.py | 77 ++++++++++----------- tests/test_mathematical.py | 19 +++-- tests/test_product.py | 24 ++++--- 3 files changed, 66 insertions(+), 54 deletions(-) diff --git a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py index 9d7459ada02c..bb7848fc4b26 100644 --- a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py +++ b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py @@ -244,6 +244,7 @@ def _get_result_shape(x1, x2, out, func, np_flag): x1, x2, x1_ndim, x2_ndim ) else: # func == "vecdot" + assert func == "vecdot" x1, x2, result_shape = _get_result_shape_vecdot( x1, x2, x1_ndim, x2_ndim ) @@ -466,11 +467,15 @@ def _gemm_matmul(exec_q, x1, x2, res): def _shape_error(shape1, shape2, func, err_msg): + """Validate the shapes of input and output arrays.""" if func == "matmul": signature = "(n?,k),(k,m?)->(n?,m?)" - else: # func == "vecdot" + elif func == "vecdot": signature = "(n?,),(n?,)->()" + else: + # applicable when err_msg == 3 + assert func is None if err_msg == 0: raise ValueError( @@ -485,7 +490,8 @@ def _shape_error(shape1, shape2, func, err_msg): f"array has shape {shape2}. " f"These cannot be broadcast together for '{func}' function." ) - elif err_msg == 2: + else: # err_msg == 2: + assert err_msg == 2 raise ValueError( f"Expected output array of shape {shape1}, but got {shape2}." ) @@ -557,6 +563,7 @@ def _validate_internal(axes, i, ndim): x1_ndim = x1.ndim x2_ndim = x2.ndim else: # func == "vecdot" + assert func == "vecdot" x1_ndim = x2_ndim = 1 axes[0] = _validate_internal(axes[0], 0, x1_ndim) @@ -573,6 +580,16 @@ def _validate_internal(axes, i, ndim): return axes +def _validate_out_array(out, exec_q): + """Validate out is supported array and has correct queue.""" + if out is not None: + dpnp.check_supported_arrays_type(out) + if dpctl.utils.get_execution_queue((exec_q, out.sycl_queue)) is None: + raise ExecutionPlacementError( + "Input and output allocation queues are not compatible" + ) + + def dpnp_cross(a, b, cp): """Return the cross product of two (arrays of) vectors.""" @@ -660,13 +677,7 @@ def dpnp_dot(a, b, /, out=None, *, conjugate=False): ) res_usm_type, exec_q = get_usm_allocations([a, b]) - if ( - out is not None - and dpctl.utils.get_execution_queue((exec_q, out.sycl_queue)) is None - ): - raise ExecutionPlacementError( - "Input and output allocation queues are not compatible" - ) + _validate_out_array(out, exec_q) # Determine the appropriate data types dot_dtype, res_dtype = _compute_res_dtype(a, b, sycl_queue=exec_q) @@ -755,19 +766,17 @@ def dpnp_matmul( dpnp.check_supported_arrays_type(x1, x2) res_usm_type, exec_q = get_usm_allocations([x1, x2]) - if out is not None: - dpnp.check_supported_arrays_type(out) - if dpctl.utils.get_execution_queue((exec_q, out.sycl_queue)) is None: - raise ExecutionPlacementError( - "Input and output allocation queues are not compatible" - ) + _validate_out_array(out, exec_q) - if order in ["a", "A"]: + if order in "aA": if x1.flags.fnc and x2.flags.fnc: order = "F" else: order = "C" + if order in "kK": + order = "C" + x1_ndim = x1.ndim x2_ndim = x2.ndim if axes is not None: @@ -938,6 +947,7 @@ def dpnp_matmul( result, ) else: # call_flag == "gemm_batch" + assert call_flag == "gemm_batch" result = _gemm_batch_matmul( exec_q, x1, @@ -962,14 +972,7 @@ def dpnp_matmul( result = dpnp.moveaxis(result, (-1,), axes_res) return dpnp.ascontiguousarray(result) - # If `order` was not passed as default - # we need to update it to match the passed `order`. - if order not in ["k", "K"]: - return dpnp.asarray(result, order=order) - # dpnp.ascontiguousarray changes 0-D array to 1-D array - if result.ndim == 0: - return result - return dpnp.ascontiguousarray(result) + return dpnp.asarray(result, order=order) result = dpnp.get_result_array(result, out, casting=casting) if axes is not None and out is result: @@ -994,14 +997,9 @@ def dpnp_vecdot( dpnp.check_supported_arrays_type(x1, x2) res_usm_type, exec_q = get_usm_allocations([x1, x2]) - if out is not None: - dpnp.check_supported_arrays_type(out) - if dpctl.utils.get_execution_queue((exec_q, out.sycl_queue)) is None: - raise ExecutionPlacementError( - "Input and output allocation queues are not compatible" - ) + _validate_out_array(out, exec_q) - if order in ["a", "A"]: + if order in "aAkK": if x1.flags.fnc and x2.flags.fnc: order = "F" else: @@ -1048,7 +1046,7 @@ def dpnp_vecdot( _, x1_is_1D, _ = _define_dim_flags(x1, axis=-1) _, x2_is_1D, _ = _define_dim_flags(x2, axis=-1) - if numpy.prod(result_shape) == 0 or x1.size == 0 or x2.size == 0: + if x1.size == 0 or x2.size == 0: order = "C" if order in "kK" else order result = _create_result_array( x1, @@ -1060,8 +1058,9 @@ def dpnp_vecdot( sycl_queue=exec_q, order=order, ) - if x1.size == 0 or x2.size == 0: - result.fill(0) + if numpy.prod(result_shape) == 0: + return result + result.fill(0) return result elif x1_is_1D and x2_is_1D: call_flag = "dot" @@ -1079,6 +1078,7 @@ def dpnp_vecdot( else: result = dpnp_dot(x1, x2, out=out, conjugate=True) else: # call_flag == "vecdot" + assert call_flag == "vecdot" x1_usm = dpnp.get_usm_ndarray(x1) x2_usm = dpnp.get_usm_ndarray(x2) result = dpnp_array._create_from_usm_ndarray( @@ -1091,13 +1091,6 @@ def dpnp_vecdot( result = dpnp.reshape(result, result_shape) if out is None: - # If `order` was not passed as default - # we need to update it to match the passed `order`. - if order not in "kK": - return dpnp.asarray(result, order=order) - # dpnp.ascontiguousarray changes 0-D array to 1-D array - if result.ndim == 0: - return result - return dpnp.ascontiguousarray(result) + return dpnp.asarray(result, order=order) return dpnp.get_result_array(result, out, casting=casting) diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py index a3c29eac3223..3d85022459ba 100644 --- a/tests/test_mathematical.py +++ b/tests/test_mathematical.py @@ -3648,6 +3648,8 @@ def test_matmul_dtype_matrix_inputs(self, dtype1, dtype2, shape_pair): expected = numpy.matmul(a1, a2) assert_dtype_allclose(result, expected) + @pytest.mark.parametrize("order1", ["C", "F", "A"]) + @pytest.mark.parametrize("order2", ["C", "F", "A"]) @pytest.mark.parametrize("order", ["C", "F", "K", "A"]) @pytest.mark.parametrize( "shape_pair", @@ -3662,17 +3664,26 @@ def test_matmul_dtype_matrix_inputs(self, dtype1, dtype2, shape_pair): "((6, 7, 4, 3), (6, 7, 3, 5))", ], ) - def test_matmul_order(self, order, shape_pair): + def test_matmul_order(self, order1, order2, order, shape_pair): shape1, shape2 = shape_pair - a1 = numpy.arange(numpy.prod(shape1)).reshape(shape1) - a2 = numpy.arange(numpy.prod(shape2)).reshape(shape2) + a1 = numpy.arange(numpy.prod(shape1)).reshape(shape1, order=order1) + a2 = numpy.arange(numpy.prod(shape2)).reshape(shape2, order=order2) b1 = dpnp.asarray(a1) b2 = dpnp.asarray(a2) result = dpnp.matmul(b1, b2, order=order) expected = numpy.matmul(a1, a2, order=order) - assert result.flags.c_contiguous == expected.flags.c_contiguous + # For the special case of shape_pair == ((6, 7, 4, 3), (6, 7, 3, 5)) + # and order1 == "F" and order2 == "F", NumPy result is not c-contiguous + # nor f-contiguous, while dpnp (and cupy) results are c-contiguous + if not ( + shape_pair == ((6, 7, 4, 3), (6, 7, 3, 5)) + and order1 == "F" + and order2 == "F" + and order == "K" + ): + assert result.flags.c_contiguous == expected.flags.c_contiguous assert result.flags.f_contiguous == expected.flags.f_contiguous assert_dtype_allclose(result, expected) diff --git a/tests/test_product.py b/tests/test_product.py index 5fb61c81b43a..e4ac9aac1074 100644 --- a/tests/test_product.py +++ b/tests/test_product.py @@ -1343,6 +1343,7 @@ def setup_method(self): "shape_pair", [ ((4,), (4,)), # call_flag: dot + ((1, 1, 4), (1, 1, 4)), # call_flag: dot ((3, 1), (3, 1)), ((2, 0), (2, 0)), # zero-size inputs, 1D output ((3, 0, 4), (3, 0, 4)), # zero-size output @@ -1353,6 +1354,7 @@ def setup_method(self): ((3, 4), (4,)), ((1, 4, 5), (3, 1, 5)), ((1, 1, 4, 5), (3, 1, 5)), + ((1, 4, 5), (1, 3, 1, 5)), ], ) def test_basic(self, dtype, shape_pair): @@ -1375,6 +1377,7 @@ def test_basic(self, dtype, shape_pair): "shape_pair", [ ((4,), (4,)), # call_flag: dot + ((1, 1, 4), (1, 1, 4)), # call_flag: dot ((3, 1), (3, 1)), ((2, 0), (2, 0)), # zero-size inputs, 1D output ((3, 0, 4), (3, 0, 4)), # zero-size output @@ -1385,6 +1388,7 @@ def test_basic(self, dtype, shape_pair): ((3, 4), (4,)), ((1, 4, 5), (3, 1, 5)), ((1, 1, 4, 5), (3, 1, 5)), + ((1, 4, 5), (1, 3, 1, 5)), ], ) def test_complex(self, dtype, shape_pair): @@ -1501,18 +1505,22 @@ def test_input_dtype_matrix(self, dtype1, dtype2): expected = numpy.vecdot(a, b) assert_dtype_allclose(result, expected) + @pytest.mark.parametrize("order1", ["C", "F", "A"]) + @pytest.mark.parametrize("order2", ["C", "F", "A"]) @pytest.mark.parametrize("order", ["C", "F", "K", "A"]) @pytest.mark.parametrize( "shape", - [((4, 3)), ((4, 3, 5)), ((6, 7, 3, 5))], - ids=["((4, 3))", "((4, 3, 5))", "((6, 7, 3, 5))"], + [(4, 3), (4, 3, 5), (6, 7, 3, 5)], + ids=["(4, 3)", "(4, 3, 5)", "(6, 7, 3, 5)"], ) - def test_order(self, order, shape): - a = numpy.arange(numpy.prod(shape)).reshape(shape) - b = dpnp.asarray(a) - - result = dpnp.vecdot(b, b, order=order) - expected = numpy.vecdot(a, a, order=order) + def test_order(self, order1, order2, order, shape): + a = numpy.arange(numpy.prod(shape)).reshape(shape, order=order1) + b = numpy.arange(numpy.prod(shape)).reshape(shape, order=order2) + a_dp = dpnp.asarray(a) + b_dp = dpnp.asarray(b) + + result = dpnp.vecdot(a_dp, b_dp, order=order) + expected = numpy.vecdot(a, b, order=order) assert result.flags.c_contiguous == expected.flags.c_contiguous assert result.flags.f_contiguous == expected.flags.f_contiguous assert_dtype_allclose(result, expected) From 0f53876f044353ddbd5b424e1541e28a75bdf5e4 Mon Sep 17 00:00:00 2001 From: Vahid Tavanashad Date: Fri, 25 Oct 2024 10:18:54 -0500 Subject: [PATCH 3/3] address comments --- dpnp/dpnp_iface_linearalgebra.py | 18 +++--- dpnp/dpnp_utils/dpnp_utils_linearalgebra.py | 65 ++++++++++----------- tests/test_product.py | 51 +++++++++++++++- 3 files changed, 90 insertions(+), 44 deletions(-) diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py index f6c83f4e50c6..49e1e3df8921 100644 --- a/dpnp/dpnp_iface_linearalgebra.py +++ b/dpnp/dpnp_iface_linearalgebra.py @@ -147,11 +147,11 @@ def dot(a, b, out=None): # TODO: use specific scalar-vector kernel return dpnp.multiply(a, b, out=out) + # numpy.dot does not allow casting even if it is safe + # casting="no" is used in the following if a_ndim == 1 and b_ndim == 1: - return dpnp_dot(a, b, out=out) + return dpnp_dot(a, b, out=out, casting="no") - # NumPy does not allow casting even if it is safe - # casting="no" is used in the following if a_ndim == 2 and b_ndim == 2: return dpnp.matmul(a, b, out=out, casting="no") @@ -753,6 +753,7 @@ def matmul( Type to use in computing the matrix product. By default, the returned array will have data type that is determined by considering Promotion Type Rule and device capabilities. + Default: ``None``. casting : {"no", "equiv", "safe", "same_kind", "unsafe"}, optional Controls what kind of data casting may occur. Default: ``"same_kind"``. @@ -1203,7 +1204,7 @@ def vecdot( .. math:: \mathbf{a} \cdot \mathbf{b} = \sum_{i=0}^{n-1} \overline{a_i}b_i - where the sum is over the last dimension (unless axis is specified) and + where the sum is over the last dimension (unless `axis` is specified) and where :math:`\overline{a_i}` denotes the complex conjugate if :math:`a_i` is complex and the identity otherwise. @@ -1221,16 +1222,17 @@ def vecdot( removed. If not provided or ``None``, a freshly-allocated array is used. Default: ``None``. - dtype : {None, dtype}, optional - Type to use in computing the vector dot product. By default, the - returned array will have data type that is determined by considering - Promotion Type Rule and device capabilities. casting : {"no", "equiv", "safe", "same_kind", "unsafe"}, optional Controls what kind of data casting may occur. Default: ``"same_kind"``. order : {"C", "F", "A", "K", None}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. + dtype : {None, dtype}, optional + Type to use in computing the vector dot product. By default, the + returned array will have data type that is determined by considering + Promotion Type Rule and device capabilities. + Default: ``None``. axes : {None, list of tuples}, optional A list of tuples with indices of axes the matrix product should operate on. For instance, for the signature of ``(i),(i)->()``, the base diff --git a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py index bb7848fc4b26..ac432d595911 100644 --- a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py +++ b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py @@ -189,7 +189,7 @@ def _define_contig_flag(x): def _define_dim_flags(x, axis): """ - Define useful flags for the main calculation in dpnp_matmul. + Define useful flags for the calculations in dpnp_matmul and dpnp_vecdot. x_is_1D: `x` is 1D array or inherently 1D (all dimensions are equal to one except for one of them), for instance, if x.shape = (1, 1, 1, 2), then x_is_1D = True @@ -220,7 +220,7 @@ def _define_dim_flags(x, axis): return x_is_2D, x_is_1D, x_base_is_1D -def _get_result_shape(x1, x2, out, func, np_flag): +def _get_result_shape(x1, x2, out, _get_result_shape_fn, np_flag): """ Three task are completed in this function: - Get the shape of the result array. @@ -239,15 +239,7 @@ def _get_result_shape(x1, x2, out, func, np_flag): "The second input array does not have enough dimensions (has 0, but requires at least 1)" ) - if func == "matmul": - x1, x2, result_shape = _get_result_shape_matmul( - x1, x2, x1_ndim, x2_ndim - ) - else: # func == "vecdot" - assert func == "vecdot" - x1, x2, result_shape = _get_result_shape_vecdot( - x1, x2, x1_ndim, x2_ndim - ) + x1, x2, result_shape = _get_result_shape_fn(x1, x2, x1_ndim, x2_ndim) if out is not None: out_shape = out.shape @@ -474,7 +466,7 @@ def _shape_error(shape1, shape2, func, err_msg): elif func == "vecdot": signature = "(n?,),(n?,)->()" else: - # applicable when err_msg == 3 + # applicable when err_msg == 2 assert func is None if err_msg == 0: @@ -655,7 +647,7 @@ def dpnp_cross(a, b, cp): return cp -def dpnp_dot(a, b, /, out=None, *, conjugate=False): +def dpnp_dot(a, b, /, out=None, *, casting="same_kind", conjugate=False): """ Return the dot product of two arrays. @@ -717,8 +709,7 @@ def dpnp_dot(a, b, /, out=None, *, conjugate=False): if dot_dtype != res_dtype: result = result.astype(res_dtype, copy=False) - # numpy.dot does not allow casting even if it is safe - return dpnp.get_result_array(result, out, casting="no") + return dpnp.get_result_array(result, out, casting=casting) def dpnp_kron(a, b, a_ndim, b_ndim): @@ -773,8 +764,10 @@ def dpnp_matmul( order = "F" else: order = "C" - - if order in "kK": + elif order in "kK": + # For order="K", we return order="C" to align with NumPy behavior + # It is different than logic used in dpnp_vecdot because NumPy + # behaves differently for matmul and vecdot order = "C" x1_ndim = x1.ndim @@ -806,7 +799,7 @@ def dpnp_matmul( ) x1, x2, result_shape = _get_result_shape( - x1, x2, out, "matmul", NumPy_special_behavior + x1, x2, out, _get_result_shape_matmul, NumPy_special_behavior ) # Determine the appropriate data types @@ -1000,6 +993,9 @@ def dpnp_vecdot( _validate_out_array(out, exec_q) if order in "aAkK": + # This logic is also used for order="K" to align with NumPy behavior. + # It is different than logic used in dpnp_matmul because NumPy + # behaves differently for matmul and vecdot if x1.flags.fnc and x2.flags.fnc: order = "F" else: @@ -1035,7 +1031,7 @@ def dpnp_vecdot( ) x1, x2, result_shape = _get_result_shape( - x1, x2, out, "vecdot", NumPy_special_behavior + x1, x2, out, _get_result_shape_vecdot, NumPy_special_behavior ) # Determine the appropriate data types @@ -1047,21 +1043,7 @@ def dpnp_vecdot( _, x2_is_1D, _ = _define_dim_flags(x2, axis=-1) if x1.size == 0 or x2.size == 0: - order = "C" if order in "kK" else order - result = _create_result_array( - x1, - x2, - out, - shape=result_shape, - dtype=res_dtype, - usm_type=res_usm_type, - sycl_queue=exec_q, - order=order, - ) - if numpy.prod(result_shape) == 0: - return result - result.fill(0) - return result + call_flag = "trivial" elif x1_is_1D and x2_is_1D: call_flag = "dot" # arrays are inehrently 1D, make them 1D @@ -1072,7 +1054,20 @@ def dpnp_vecdot( call_flag = "vecdot" # dispatch to proper function call - if call_flag == "dot": + if call_flag == "trivial": + result = _create_result_array( + x1, + x2, + out, + shape=result_shape, + dtype=res_dtype, + usm_type=res_usm_type, + sycl_queue=exec_q, + order=order, + ) + if numpy.prod(result_shape) != 0: + result.fill(0) + elif call_flag == "dot": if out is not None and out.shape != (): result = dpnp_dot(x1, x2, out=None, conjugate=True) else: diff --git a/tests/test_product.py b/tests/test_product.py index e4ac9aac1074..f989ba837343 100644 --- a/tests/test_product.py +++ b/tests/test_product.py @@ -1525,6 +1525,27 @@ def test_order(self, order1, order2, order, shape): assert result.flags.f_contiguous == expected.flags.f_contiguous assert_dtype_allclose(result, expected) + @pytest.mark.parametrize("order", ["C", "F", "K", "A"]) + @pytest.mark.parametrize( + "shape", [(2, 4, 0), (4, 0, 5)], ids=["(2, 4, 0)", "(4, 0, 5)"] + ) + def test_order_trivial(self, order, shape): + # input is both c-contiguous and f-contiguous + a = numpy.ones(shape) + a_dp = dpnp.asarray(a) + + result = dpnp.vecdot(a_dp, a_dp, order=order) + expected = numpy.vecdot(a, a, order=order) + if shape == (2, 4, 0) and order == "A": + # NumPy does not behave correctly for this case, for order="A", + # if input is both c- and f-contiguous, output is c-contiguous + assert result.flags.c_contiguous + assert not result.flags.f_contiguous + else: + assert result.flags.c_contiguous == expected.flags.c_contiguous + assert result.flags.f_contiguous == expected.flags.f_contiguous + assert_dtype_allclose(result, expected) + @pytest.mark.parametrize( "order1, order2, out_order", [ @@ -1538,7 +1559,7 @@ def test_order(self, order1, order2, order, shape): ("F", "F", "C"), ], ) - def test_out(self, order1, order2, out_order): + def test_out_order(self, order1, order2, out_order): a1 = numpy.arange(20).reshape(5, 4, order=order1) a2 = numpy.arange(20).reshape(5, 4, order=order2) @@ -1555,6 +1576,34 @@ def test_out(self, order1, order2, out_order): assert result.flags.f_contiguous == expected.flags.f_contiguous assert_dtype_allclose(result, expected) + @pytest.mark.parametrize("dtype1", get_all_dtypes(no_none=True)) + @pytest.mark.parametrize("dtype2", get_all_dtypes(no_none=True)) + @pytest.mark.parametrize( + "shape_pair", + [ + ((4,), ()), + ((1, 1, 4), (1, 1)), + ((6, 7, 4, 3), (6, 7, 4)), + ((2, 0), (2,)), # zero-size inputs, 1D output + ((3, 0, 4), (3, 0)), # zero-size output + ], + ) + def test_out_dtype(self, dtype1, dtype2, shape_pair): + shape1, shape2 = shape_pair + a = numpy.ones(shape1, dtype=dtype1) + b = dpnp.asarray(a) + + out_np = numpy.empty(shape2, dtype=dtype2) + out_dp = dpnp.asarray(out_np) + + if dpnp.can_cast(dtype1, dtype2, casting="same_kind"): + result = dpnp.vecdot(b, b, out=out_dp) + expected = numpy.vecdot(a, a, out=out_np) + assert_dtype_allclose(result, expected) + else: + with pytest.raises(TypeError): + dpnp.vecdot(b, b, out=out_dp) + @pytest.mark.parametrize( "out_shape", [