From efe804b8195cd0f6d39fe3ef50338a71ea93734a Mon Sep 17 00:00:00 2001
From: Vahid Tavanashad <vahid.tavanashad@intel.com>
Date: Mon, 7 Oct 2024 17:26:34 -0500
Subject: [PATCH 1/3] implement dpnp.vecdot and dpnp.linalg.vecdot

---
 doc/reference/binary.rst                    |   4 +-
 doc/reference/linalg.rst                    |   2 +
 dpnp/dpnp_iface_linearalgebra.py            | 136 +++++++-
 dpnp/dpnp_utils/dpnp_utils_linearalgebra.py | 328 +++++++++++++++-----
 dpnp/linalg/dpnp_iface_linalg.py            |  56 ++++
 tests/test_mathematical.py                  |  16 +-
 tests/test_product.py                       | 283 +++++++++++++++++
 tests/test_sycl_queue.py                    |  29 ++
 tests/test_umath.py                         |   2 -
 tests/test_usm_type.py                      |  26 ++
 10 files changed, 772 insertions(+), 110 deletions(-)

diff --git a/doc/reference/binary.rst b/doc/reference/binary.rst
index 61382a6ef6d9..97f01c895dbf 100644
--- a/doc/reference/binary.rst
+++ b/doc/reference/binary.rst
@@ -1,5 +1,5 @@
 Bit-wise operations
-=================
+===================
 
 .. https://numpy.org/doc/stable/reference/routines.bitwise.html
 
@@ -22,7 +22,6 @@ Element-wise bit operations
    dpnp.bitwise_right_shift
    dpnp.bitwise_count
 
-
 Bit packing
 -----------
 
@@ -33,7 +32,6 @@ Bit packing
    dpnp.packbits
    dpnp.unpackbits
 
-
 Output formatting
 -----------------
 
diff --git a/doc/reference/linalg.rst b/doc/reference/linalg.rst
index 58d667f94adc..39169a508e91 100644
--- a/doc/reference/linalg.rst
+++ b/doc/reference/linalg.rst
@@ -87,6 +87,7 @@ Other matrix operations
 -----------------------
 .. autosummary::
    :toctree: generated/
+   :nosignatures:
 
    dpnp.diagonal
    dpnp.linalg.diagonal (Array API compatible)
@@ -96,5 +97,6 @@ Exceptions
 ----------
 .. autosummary::
    :toctree: generated/
+   :nosignatures:
 
    dpnp.linalg.linAlgError
diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py
index 924099ad2369..f6c83f4e50c6 100644
--- a/dpnp/dpnp_iface_linearalgebra.py
+++ b/dpnp/dpnp_iface_linearalgebra.py
@@ -48,6 +48,7 @@
     dpnp_dot,
     dpnp_kron,
     dpnp_matmul,
+    dpnp_vecdot,
 )
 
 __all__ = [
@@ -60,6 +61,7 @@
     "outer",
     "tensordot",
     "vdot",
+    "vecdot",
 ]
 
 
@@ -728,7 +730,6 @@ def matmul(
     dtype=None,
     subok=True,
     signature=None,
-    extobj=None,
     axes=None,
     axis=None,
 ):
@@ -758,12 +759,12 @@ def matmul(
     order : {"C", "F", "A", "K", None}, optional
         Memory layout of the newly output array, if parameter `out` is ``None``.
         Default: ``"K"``.
-    axes : {list of tuples}, optional
+    axes : {None, list of tuples}, optional
         A list of tuples with indices of axes the matrix product should operate
         on. For instance, for the signature of ``(i,j),(j,k)->(i,k)``, the base
         elements are 2d matrices and these are taken to be stored in the two
         last axes of each argument. The corresponding axes keyword would be
-        [(-2, -1), (-2, -1), (-2, -1)].
+        ``[(-2, -1), (-2, -1), (-2, -1)]``.
         Default: ``None``.
 
     Returns
@@ -774,8 +775,8 @@ def matmul(
 
     Limitations
     -----------
-    Keyword arguments `subok`, `signature`, `extobj`, and `axis` are
-    only supported with their default value.
+    Keyword arguments `subok`, `signature`, and `axis` are only supported with
+    their default values.
     Otherwise ``NotImplementedError`` exception will be raised.
 
     See Also
@@ -834,7 +835,7 @@ def matmul(
 
     """
 
-    if subok is False:
+    if not subok:
         raise NotImplementedError(
             "subok keyword argument is only supported by its default value."
         )
@@ -842,10 +843,6 @@ def matmul(
         raise NotImplementedError(
             "signature keyword argument is only supported by its default value."
         )
-    if extobj is not None:
-        raise NotImplementedError(
-            "extobj keyword argument is only supported by its default value."
-        )
     if axis is not None:
         raise NotImplementedError(
             "axis keyword argument is only supported by its default value."
@@ -1135,6 +1132,9 @@ def vdot(a, b):
     --------
     :obj:`dpnp.dot` : Returns the dot product.
     :obj:`dpnp.matmul` : Returns the matrix product.
+    :obj:`dpnp.vecdot` : Vector dot product of two arrays.
+    :obj:`dpnp.linalg.vecdot` : Array API compatible version of
+                    :obj:`dpnp.vecdot`.
 
     Examples
     --------
@@ -1178,3 +1178,119 @@ def vdot(a, b):
 
     # dot product of flatten arrays
     return dpnp_dot(dpnp.ravel(a), dpnp.ravel(b), out=None, conjugate=True)
+
+
+def vecdot(
+    x1,
+    x2,
+    /,
+    out=None,
+    *,
+    casting="same_kind",
+    order="K",
+    dtype=None,
+    subok=True,
+    signature=None,
+    axes=None,
+    axis=None,
+):
+    r"""
+    Computes the vector dot product.
+
+    Let :math:`\mathbf{a}` be a vector in `x1` and :math:`\mathbf{b}` be
+    a corresponding vector in `x2`. The dot product is defined as:
+
+    .. math::
+       \mathbf{a} \cdot \mathbf{b} = \sum_{i=0}^{n-1} \overline{a_i}b_i
+
+    where the sum is over the last dimension (unless axis is specified) and
+    where :math:`\overline{a_i}` denotes the complex conjugate if :math:`a_i`
+    is complex and the identity otherwise.
+
+    For full documentation refer to :obj:`numpy.vecdot`.
+
+    Parameters
+    ----------
+    x1 : {dpnp.ndarray, usm_ndarray}
+        First input array.
+    x2 : {dpnp.ndarray, usm_ndarray}
+        Second input array.
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
+        A location into which the result is stored. If provided, it must have
+        a shape that the broadcasted shape of `x1` and `x2` with the last axis
+        removed. If not provided or ``None``, a freshly-allocated array is
+        used.
+        Default: ``None``.
+    dtype : {None, dtype}, optional
+        Type to use in computing the vector dot product. By default, the
+        returned array will have data type that is determined by considering
+        Promotion Type Rule and device capabilities.
+    casting : {"no", "equiv", "safe", "same_kind", "unsafe"}, optional
+        Controls what kind of data casting may occur.
+        Default: ``"same_kind"``.
+    order : {"C", "F", "A", "K", None}, optional
+        Memory layout of the newly output array, if parameter `out` is ``None``.
+        Default: ``"K"``.
+    axes : {None, list of tuples}, optional
+        A list of tuples with indices of axes the matrix product should operate
+        on. For instance, for the signature of ``(i),(i)->()``, the base
+        elements are vectors and these are taken to be stored in the last axes
+        of each argument. The corresponding axes keyword would be
+        ``[(-1,), (-1), ()]``.
+        Default: ``None``.
+    axis : {None, int}, optional
+        Axis over which to compute the dot product. This is a short-cut for
+        passing in axes with entries of ``(axis,)`` for each
+        single-core-dimension argument and ``()`` for all others. For instance,
+        for a signature ``(i),(i)->()``, it is equivalent to passing in
+        ``axes=[(axis,), (axis,), ()]``.
+        Default: ``None``.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        The vector dot product of the inputs.
+        This is a 0-d array only when both `x1`, `x2` are 1-d vectors.
+
+    Limitations
+    -----------
+    Keyword arguments `subok`, and `signature` are only supported with their
+    default values. Otherwise ``NotImplementedError`` exception will be raised.
+
+    See Also
+    --------
+    :obj:`dpnp.linalg.vecdot` : Array API compatible version.
+    :obj:`dpnp.vdot` : Complex-conjugating dot product.
+    :obj:`dpnp.einsum` : Einstein summation convention.
+
+    Examples
+    --------
+    Get the projected size along a given normal for an array of vectors.
+
+    >>> import dpnp as np
+    >>> v = np.array([[0., 5., 0.], [0., 0., 10.], [0., 6., 8.]])
+    >>> n = np.array([0., 0.6, 0.8])
+    >>> np.vecdot(v, n)
+    array([ 3.,  8., 10.])
+
+    """
+
+    if not subok:
+        raise NotImplementedError(
+            "subok keyword argument is only supported by its default value."
+        )
+    if signature is not None:
+        raise NotImplementedError(
+            "signature keyword argument is only supported by its default value."
+        )
+
+    return dpnp_vecdot(
+        x1,
+        x2,
+        out=out,
+        casting=casting,
+        order=order,
+        dtype=dtype,
+        axes=axes,
+        axis=axis,
+    )
diff --git a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
index e15bd93d7bbd..9d7459ada02c 100644
--- a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
+++ b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
@@ -36,7 +36,7 @@
 from dpnp.dpnp_array import dpnp_array
 from dpnp.dpnp_utils import get_usm_allocations
 
-__all__ = ["dpnp_cross", "dpnp_dot", "dpnp_kron", "dpnp_matmul"]
+__all__ = ["dpnp_cross", "dpnp_dot", "dpnp_kron", "dpnp_matmul", "dpnp_vecdot"]
 
 
 def _compute_res_dtype(*arrays, sycl_queue, dtype=None, casting="no"):
@@ -187,7 +187,7 @@ def _define_contig_flag(x):
     return flag, x_is_c_contiguous, x_is_f_contiguous
 
 
-def _define_dim_flags(x, pos):
+def _define_dim_flags(x, axis):
     """
     Define useful flags for the main calculation in dpnp_matmul.
     x_is_1D: `x` is 1D array or inherently 1D (all dimensions are equal to one
@@ -200,14 +200,14 @@ def _define_dim_flags(x, pos):
     if x.shape = (3, 4, 1, 2), then x_base_is_1D = True
     """
 
-    index = -1 if pos == 0 else -2
     x_shape = x.shape
     x_ndim = x.ndim
     x_is_1D = x_ndim == 1
     if numpy.prod(x_shape) != 0:
+        # if the first condition is valid, the 2nd condition is not checked
         # the 2nd condition is only expected to be checked for
-        # ND-arrays (N>1) since index is in [-2, -1]
-        x_is_1D = x_is_1D or numpy.prod(x_shape) == x_shape[index]
+        # ND-arrays (N>1) since axis could be -2
+        x_is_1D = x_is_1D or numpy.prod(x_shape) == x_shape[axis]
 
     x_is_2D = False
     if not x_is_1D:
@@ -220,7 +220,7 @@ def _define_dim_flags(x, pos):
     return x_is_2D, x_is_1D, x_base_is_1D
 
 
-def _get_result_shape(x1, x2, out, np_flag):
+def _get_result_shape(x1, x2, out, func, np_flag):
     """
     Three task are completed in this function:
         - Get the shape of the result array.
@@ -230,36 +230,55 @@ def _get_result_shape(x1, x2, out, np_flag):
     x1_ndim = x1.ndim
     x2_ndim = x2.ndim
 
-    x1_shape = x1.shape
-    x2_shape = x2.shape
-
     if x1_ndim == 0:
         raise ValueError(
-            "Input array 0 does not have enough dimensions (has 0, but requires at least 1)"
+            "The first input array does not have enough dimensions (has 0, but requires at least 1)"
         )
     if x2_ndim == 0:
         raise ValueError(
-            "Input array 1 does not have enough dimensions (has 0, but requires at least 1)"
+            "The second input array does not have enough dimensions (has 0, but requires at least 1)"
+        )
+
+    if func == "matmul":
+        x1, x2, result_shape = _get_result_shape_matmul(
+            x1, x2, x1_ndim, x2_ndim
         )
+    else:  # func == "vecdot"
+        x1, x2, result_shape = _get_result_shape_vecdot(
+            x1, x2, x1_ndim, x2_ndim
+        )
+
+    if out is not None:
+        out_shape = out.shape
+        if out_shape != result_shape and not np_flag:
+            _shape_error(result_shape, out_shape, None, err_msg=2)
+
+    return x1, x2, result_shape
+
+
+def _get_result_shape_matmul(x1, x2, x1_ndim, x2_ndim):
+
+    x1_shape = x1.shape
+    x2_shape = x2.shape
 
     if x1_ndim == 1 and x2_ndim == 1:
         if x1_shape[-1] != x2_shape[-1]:
-            _shape_error(x1_shape[-1], x2_shape[-1], None, err_msg=0)
+            _shape_error(x1_shape[-1], x2_shape[-1], "matmul", err_msg=0)
         result_shape = ()
     elif x1_ndim == 1:
         if x1_shape[-1] != x2_shape[-2]:
-            _shape_error(x1_shape[-1], x2_shape[-2], None, err_msg=0)
+            _shape_error(x1_shape[-1], x2_shape[-2], "matmul", err_msg=0)
         result_shape = x2_shape[:-2] + (x2_shape[-1],)
     elif x2_ndim == 1:
         if x1_shape[-1] != x2_shape[-1]:
-            _shape_error(x1_shape[-1], x2_shape[-1], None, err_msg=0)
+            _shape_error(x1_shape[-1], x2_shape[-1], "matmul", err_msg=0)
         result_shape = x1_shape[:-1]
     else:  # at least 2D
-        x1_is_2D, x1_is_1D, _ = _define_dim_flags(x1, pos=0)
-        x2_is_2D, x2_is_1D, _ = _define_dim_flags(x2, pos=1)
+        x1_is_2D, x1_is_1D, _ = _define_dim_flags(x1, axis=-1)
+        x2_is_2D, x2_is_1D, _ = _define_dim_flags(x2, axis=-2)
 
         if x1_shape[-1] != x2_shape[-2]:
-            _shape_error(x1_shape[-1], x2_shape[-2], None, err_msg=0)
+            _shape_error(x1_shape[-1], x2_shape[-2], "matmul", err_msg=0)
 
         if x1_ndim == 2 and x2_ndim == 2:
             result_shape = (x1_shape[-2], x2_shape[-1])
@@ -292,33 +311,56 @@ def _get_result_shape(x1, x2, out, np_flag):
                         if not (x2_is_2D or x2_is_1D):
                             x2 = dpnp.repeat(x2, x1_shape[i], axis=i)
                     else:
-                        _shape_error(
-                            x1_shape[:-2], x2_shape[:-2], None, err_msg=2
-                        )
+                        _shape_error(x1_shape, x2_shape, "matmul", err_msg=1)
 
             result_shape = tuple(tmp_shape) + (x1.shape[-2], x2.shape[-1])
 
-    if out is not None:
-        out_shape = out.shape
-        if out_shape != result_shape and not np_flag:
-            len_out = len(out_shape)
-            len_res = len(result_shape)
-            if len_out != len_res:
-                _shape_error(len_out, len_res, None, err_msg=4)
-            for i in range(len_out):
-                if out_shape[i] != result_shape[i]:
-                    if i == len_out - 1:
-                        _shape_error(
-                            out_shape[i], result_shape[i], 1, err_msg=1
-                        )
-                    elif i == len_out - 2:
-                        _shape_error(
-                            out_shape[i], result_shape[i], 0, err_msg=1
-                        )
-                    else:
-                        _shape_error(
-                            out_shape[:-2], result_shape[:-2], None, err_msg=3
-                        )
+    return x1, x2, result_shape
+
+
+def _get_result_shape_vecdot(x1, x2, x1_ndim, x2_ndim):
+
+    x1_shape = x1.shape
+    x2_shape = x2.shape
+
+    if x1_shape[-1] != x2_shape[-1]:
+        _shape_error(x1_shape[-1], x2_shape[-1], "vecdot", err_msg=0)
+
+    _, x1_is_1D, _ = _define_dim_flags(x1, axis=-1)
+    _, x2_is_1D, _ = _define_dim_flags(x2, axis=-1)
+
+    if x1_ndim == 1 and x2_ndim == 1:
+        result_shape = ()
+    elif x1_is_1D:
+        result_shape = x2_shape[:-1]
+    elif x2_is_1D:
+        result_shape = x1_shape[:-1]
+    else:  # at least 2D
+        if x1_ndim != x2_ndim:
+            diff = abs(x1_ndim - x2_ndim)
+            if x1_ndim < x2_ndim:
+                x1 = dpnp.reshape(x1, ((1,) * diff + x1.shape))
+                x1_shape = x1.shape
+            else:
+                x2 = dpnp.reshape(x2, ((1,) * diff + x2.shape))
+                x2_shape = x2.shape
+
+        # examining the option to align inputs when their
+        # shapes differ but the shape of one of them is 1
+        # in that dimension (similar to braodcasting concept)
+        tmp_shape = list(x1_shape[:-1])
+        for i in range(len(tmp_shape)):
+            if x1_shape[i] != x2_shape[i]:
+                if x1_shape[i] == 1:
+                    tmp_shape[i] = x2_shape[i]
+                    # Unlike `matmul`, no need to duplicate data for the
+                    # dimension with shape equal to one; dpt.vecdot handles it
+                elif x2_shape[i] == 1:
+                    pass
+                else:
+                    _shape_error(x1_shape, x2_shape, "vecdot", err_msg=1)
+
+        result_shape = tuple(tmp_shape)
 
     return x1, x2, result_shape
 
@@ -423,37 +465,29 @@ def _gemm_matmul(exec_q, x1, x2, res):
     return res
 
 
-def _shape_error(a, b, core_dim, err_msg):
+def _shape_error(shape1, shape2, func, err_msg):
+
+    if func == "matmul":
+        signature = "(n?,k),(k,m?)->(n?,m?)"
+    else:  # func == "vecdot"
+        signature = "(n?,),(n?,)->()"
+
     if err_msg == 0:
         raise ValueError(
             "Input arrays have a mismatch in their core dimensions. "
             "The core dimensions should follow this signature: "
-            f"(n?,k),(k,m?)->(n?,m?) (size {a} is different from {b})"
+            f"{signature} (size {shape1} is different from {shape2})"
         )
     elif err_msg == 1:
         raise ValueError(
-            f"Output array has a mismatch in its core dimension {core_dim}. "
-            "The core dimensions should follow this signature: "
-            f"(n?,k),(k,m?)->(n?,m?) (size {a} is different from {b})"
+            "The shapes of the input arrays are incompatible. "
+            f"The first input array has shape {shape1} and the second input "
+            f"array has shape {shape2}. "
+            f"These cannot be broadcast together for '{func}' function."
         )
     elif err_msg == 2:
         raise ValueError(
-            "Leading element(s) of the input arrays' shape do not match and "
-            "they could not be broadcast together, the leading element(s) of "
-            f"the input array 0 is {a} and it is different from leading "
-            f"element(s) of the input array 1 which is {b}."
-        )
-    elif err_msg == 3:
-        raise ValueError(
-            "The shape of the output array does not have similar leading "
-            "elements to the shape of input arrays, the leading element(s) of "
-            f"the output array is {a} and it is different from the leading "
-            f"element(s) of the input arrays which is {b}."
-        )
-    elif err_msg == 4:
-        raise ValueError(
-            "Output array does not have enough dimensions "
-            f"(has {a} while requires {b})"
+            f"Expected output array of shape {shape1}, but got {shape2}."
         )
 
 
@@ -479,7 +513,7 @@ def _standardize_strides_to_nonzero(strides, shape):
     return stndrd_strides
 
 
-def _validate_axes(x1, x2, axes):
+def _validate_axes(x1, x2, axes, func):
     """Check axes is valid for matmul function."""
 
     def _validate_internal(axes, i, ndim):
@@ -514,19 +548,24 @@ def _validate_internal(axes, i, ndim):
 
     if not isinstance(axes, list):
         raise TypeError("Axes should be a list.")
-    else:
-        if len(axes) != 3:
-            raise ValueError(
-                "Axes should be a list of three tuples for inputs and output."
-            )
+    elif len(axes) != 3:
+        raise ValueError(
+            "Axes should be a list of three tuples: two inputs and one output."
+        )
 
-    axes[0] = _validate_internal(axes[0], 0, x1.ndim)
-    axes[1] = _validate_internal(axes[1], 1, x2.ndim)
+    if func == "matmul":
+        x1_ndim = x1.ndim
+        x2_ndim = x2.ndim
+    else:  # func == "vecdot"
+        x1_ndim = x2_ndim = 1
 
-    if x1.ndim == 1 and x2.ndim == 1:
+    axes[0] = _validate_internal(axes[0], 0, x1_ndim)
+    axes[1] = _validate_internal(axes[1], 1, x2_ndim)
+
+    if x1_ndim == 1 and x2_ndim == 1:
         if axes[2] != ():
             raise TypeError("Axes item 2 should be an empty tuple.")
-    elif x1.ndim == 1 or x2.ndim == 1:
+    elif x1_ndim == 1 or x2_ndim == 1:
         axes[2] = _validate_internal(axes[2], 2, 1)
     else:
         axes[2] = _validate_internal(axes[2], 2, 2)
@@ -709,10 +748,8 @@ def dpnp_matmul(
     """
     Return the matrix product of two arrays.
 
-    The main calculation is done by calling an extension function
-    for BLAS library of OneMKL. Depending on dimension of `x1` and `x2` arrays,
-    it will be either ``gemm`` (for one- and two-dimentional arrays) or
-    ``gemm_batch``(for others).
+    The main calculation is performed by calling an extension function
+    for BLAS library of OneMKL.
 
     """
 
@@ -726,7 +763,7 @@ def dpnp_matmul(
             )
 
     if order in ["a", "A"]:
-        if x1.flags.f_contiguous and x2.flags.f_contiguous:
+        if x1.flags.fnc and x2.flags.fnc:
             order = "F"
         else:
             order = "C"
@@ -734,7 +771,7 @@ def dpnp_matmul(
     x1_ndim = x1.ndim
     x2_ndim = x2.ndim
     if axes is not None:
-        axes = _validate_axes(x1, x2, axes)
+        axes = _validate_axes(x1, x2, axes, "matmul")
 
         axes_x1, axes_x2, axes_res = axes
         axes_x1 = normalize_axis_tuple(axes_x1, x1_ndim, "axis")
@@ -753,14 +790,14 @@ def dpnp_matmul(
             elif len(axes_res) == 1:
                 out = dpnp.moveaxis(out, axes_res, (-1,))
 
-    # With these conditions, the result is a 0D array. However,
-    # NumPy allows out to have any shape and the result is expanded to it
+    # When inputs are 1-D arrays, the result is a 0-D array. For this case,
+    # NumPy allows out keyword to have any shape and the result is broadcast to it
     NumPy_special_behavior = (
         out is not None and x1_ndim == 1 and x2_ndim == 1 and out.shape != ()
     )
 
     x1, x2, result_shape = _get_result_shape(
-        x1, x2, out, NumPy_special_behavior
+        x1, x2, out, "matmul", NumPy_special_behavior
     )
 
     # Determine the appropriate data types
@@ -772,8 +809,8 @@ def dpnp_matmul(
     transpose = False
     x1_shape = x1.shape
     x2_shape = x2.shape
-    x1_is_2D, x1_is_1D, x1_base_is_1D = _define_dim_flags(x1, pos=0)
-    x2_is_2D, x2_is_1D, x2_base_is_1D = _define_dim_flags(x2, pos=1)
+    x1_is_2D, x1_is_1D, x1_base_is_1D = _define_dim_flags(x1, axis=-1)
+    x2_is_2D, x2_is_1D, x2_base_is_1D = _define_dim_flags(x2, axis=-2)
 
     # TODO: investigate usage of syrk function from BLAS in
     # case of a.T @ a and a @ a.T to gain performance.
@@ -939,3 +976,128 @@ def dpnp_matmul(
         # out and out_orig contain the same data but they have different shape
         return out_orig
     return result
+
+
+def dpnp_vecdot(
+    x1,
+    x2,
+    /,
+    out=None,
+    *,
+    casting="same_kind",
+    order="K",
+    dtype=None,
+    axes=None,
+    axis=None,
+):
+    """Vector dot product of two arrays."""
+
+    dpnp.check_supported_arrays_type(x1, x2)
+    res_usm_type, exec_q = get_usm_allocations([x1, x2])
+    if out is not None:
+        dpnp.check_supported_arrays_type(out)
+        if dpctl.utils.get_execution_queue((exec_q, out.sycl_queue)) is None:
+            raise ExecutionPlacementError(
+                "Input and output allocation queues are not compatible"
+            )
+
+    if order in ["a", "A"]:
+        if x1.flags.fnc and x2.flags.fnc:
+            order = "F"
+        else:
+            order = "C"
+
+    x1_ndim = x1.ndim
+    x2_ndim = x2.ndim
+    if axes is None and axis is None:
+        # default behavior with axis=-1
+        pass
+    elif axes is not None:
+        if axis is not None:
+            raise TypeError("cannot specify both `axis` and `axes`.")
+
+        axes = _validate_axes(x1, x2, axes, "vecdot")
+
+        axes_x1, axes_x2, axes_res = axes
+        axes_x1 = normalize_axis_tuple(axes_x1, x1_ndim, "axis")
+        axes_x2 = normalize_axis_tuple(axes_x2, x2_ndim, "axis")
+
+        # Move the axes that are going to be used in dot product,
+        # to the end of "x1" and "x2"
+        x1 = dpnp.moveaxis(x1, axes_x1, -1) if x1_ndim != 1 else x1
+        x2 = dpnp.moveaxis(x2, axes_x2, -1) if x2_ndim != 1 else x2
+    else:
+        x1 = dpnp.moveaxis(x1, axis, -1) if x1_ndim != 1 else x1
+        x2 = dpnp.moveaxis(x2, axis, -1) if x2_ndim != 1 else x2
+
+    # When inputs are 1-D arrays, the result is a 0-D array. For this case,
+    # NumPy allows out keyword to have any shape and the result is broadcast to it
+    NumPy_special_behavior = (
+        out is not None and x1_ndim == 1 and x2_ndim == 1 and out.shape != ()
+    )
+
+    x1, x2, result_shape = _get_result_shape(
+        x1, x2, out, "vecdot", NumPy_special_behavior
+    )
+
+    # Determine the appropriate data types
+    _, res_dtype = _compute_res_dtype(
+        x1, x2, dtype=dtype, casting=casting, sycl_queue=exec_q
+    )
+
+    _, x1_is_1D, _ = _define_dim_flags(x1, axis=-1)
+    _, x2_is_1D, _ = _define_dim_flags(x2, axis=-1)
+
+    if numpy.prod(result_shape) == 0 or x1.size == 0 or x2.size == 0:
+        order = "C" if order in "kK" else order
+        result = _create_result_array(
+            x1,
+            x2,
+            out,
+            shape=result_shape,
+            dtype=res_dtype,
+            usm_type=res_usm_type,
+            sycl_queue=exec_q,
+            order=order,
+        )
+        if x1.size == 0 or x2.size == 0:
+            result.fill(0)
+        return result
+    elif x1_is_1D and x2_is_1D:
+        call_flag = "dot"
+        # arrays are inehrently 1D, make them 1D
+        x1 = dpnp.ravel(x1)
+        x2 = dpnp.ravel(x2)
+    else:
+        # TODO: if a batch version of MKL dot is implemented, can be used here
+        call_flag = "vecdot"
+
+    # dispatch to proper function call
+    if call_flag == "dot":
+        if out is not None and out.shape != ():
+            result = dpnp_dot(x1, x2, out=None, conjugate=True)
+        else:
+            result = dpnp_dot(x1, x2, out=out, conjugate=True)
+    else:  # call_flag == "vecdot"
+        x1_usm = dpnp.get_usm_ndarray(x1)
+        x2_usm = dpnp.get_usm_ndarray(x2)
+        result = dpnp_array._create_from_usm_ndarray(
+            dpt.vecdot(x1_usm, x2_usm, axis=-1)
+        )
+
+    if NumPy_special_behavior:
+        result = dpnp.tile(result, out.shape)
+    elif result.shape != result_shape:
+        result = dpnp.reshape(result, result_shape)
+
+    if out is None:
+        # If `order` was not passed as default
+        # we need to update it to match the passed `order`.
+        if order not in "kK":
+            return dpnp.asarray(result, order=order)
+        # dpnp.ascontiguousarray changes 0-D array to 1-D array
+        if result.ndim == 0:
+            return result
+        return dpnp.ascontiguousarray(result)
+
+    return dpnp.get_result_array(result, out, casting=casting)
diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py
index ce7504c18d69..9cf3eea2edec 100644
--- a/dpnp/linalg/dpnp_iface_linalg.py
+++ b/dpnp/linalg/dpnp_iface_linalg.py
@@ -95,6 +95,7 @@
     "tensorinv",
     "tensorsolve",
     "trace",
+    "vecdot",
     "vector_norm",
 ]
 
@@ -2161,6 +2162,61 @@ def trace(x, /, *, offset=0, dtype=None):
     return dpnp.trace(x, offset, axis1=-2, axis2=-1, dtype=dtype)
 
 
+def vecdot(x1, x2, /, *, axis=-1):
+    r"""
+    Computes the vector dot product.
+
+    This function is restricted to arguments compatible with the Array API,
+    contrary to :obj:`dpnp.vecdot`.
+
+    Let :math:`\mathbf{a}` be a vector in `x1` and :math:`\mathbf{b}` be
+    a corresponding vector in `x2`. The dot product is defined as:
+
+    .. math::
+       \mathbf{a} \cdot \mathbf{b} = \sum_{i=0}^{n-1} \overline{a_i}b_i
+
+    over the dimension specified by `axis` and where :math:`\overline{a_i}`
+    denotes the complex conjugate if :math:`a_i` is complex and the identity
+    otherwise.
+
+    For full documentation refer to :obj:`numpy.linalg.vecdot`.
+
+    Parameters
+    ----------
+    x1 : {dpnp.ndarray, usm_ndarray}
+        First input array.
+    x2 : {dpnp.ndarray, usm_ndarray}
+        Second input array.
+    axis : int, optional
+        Axis over which to compute the dot product.
+        Default: ``-1``.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        The vector dot product of the inputs.
+
+    See Also
+    --------
+    :obj:`dpnp.vecdot` : Similar function with support for more
+                    keyword arguments.
+    :obj:`dpnp.vdot` : Complex-conjugating dot product.
+
+    Examples
+    --------
+    Get the projected size along a given normal for an array of vectors.
+
+    >>> import dpnp as np
+    >>> v = np.array([[0., 5., 0.], [0., 0., 10.], [0., 6., 8.]])
+    >>> n = np.array([0., 0.6, 0.8])
+    >>> np.linalg.vecdot(v, n)
+    array([ 3.,  8., 10.])
+
+    """
+
+    return dpnp.vecdot(x1, x2, axis=axis)
+
+
 def vector_norm(x, /, *, axis=None, keepdims=False, ord=2):
     """
     Computes the vector norm of a vector (or batch of vectors) `x`.
diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py
index e511f438d3e5..a3c29eac3223 100644
--- a/tests/test_mathematical.py
+++ b/tests/test_mathematical.py
@@ -3929,9 +3929,8 @@ def test_matmul_out3(self, trans, dtype):
         ],
     )
     def test_matmul_out_0D(self, out_shape):
-        # for matmul of 0-D arrays with out keyword,
-        # NumPy repeats the data to match the shape
-        # of output array
+        # for matmul of 1-D arrays, output is 0-D and if out keyword is given
+        # NumPy repeats the data to match the shape of output array
         a = numpy.arange(3)
         b = dpnp.asarray(a)
 
@@ -4093,15 +4092,8 @@ def test_matmul_not_implemented(self):
             dpnp.matmul(a1, a2, subok=False)
 
         with pytest.raises(NotImplementedError):
-            dpnp.matmul(
-                a1, a2, signature=(dpnp.float32, dpnp.float32, dpnp.float32)
-            )
-
-        def custom_error_callback(err):
-            print("Custom error callback triggered with error:", err)
-
-        with pytest.raises(NotImplementedError):
-            dpnp.matmul(a1, a2, extobj=[32, 1, custom_error_callback])
+            signature = (dpnp.float32, dpnp.float32, dpnp.float32)
+            dpnp.matmul(a1, a2, signature=signature)
 
         with pytest.raises(NotImplementedError):
             dpnp.matmul(a1, a2, axis=2)
diff --git a/tests/test_product.py b/tests/test_product.py
index 6c199b070d25..5fb61c81b43a 100644
--- a/tests/test_product.py
+++ b/tests/test_product.py
@@ -1329,3 +1329,286 @@ def test_vdot_error(self):
         # The second array should be of size one
         with pytest.raises(ValueError):
             dpnp.vdot(b, a)
+
+
+@testing.with_requires("numpy>=2.0")
+class TestVecdot:
+    def setup_method(self):
+        numpy.random.seed(42)
+
+    @pytest.mark.parametrize(
+        "dtype", get_all_dtypes(no_none=True, no_complex=True)
+    )
+    @pytest.mark.parametrize(
+        "shape_pair",
+        [
+            ((4,), (4,)),  # call_flag: dot
+            ((3, 1), (3, 1)),
+            ((2, 0), (2, 0)),  # zero-size inputs, 1D output
+            ((3, 0, 4), (3, 0, 4)),  # zero-size output
+            ((3, 4), (3, 4)),
+            ((1, 4), (3, 4)),
+            ((4,), (3, 4)),
+            ((3, 4), (1, 4)),
+            ((3, 4), (4,)),
+            ((1, 4, 5), (3, 1, 5)),
+            ((1, 1, 4, 5), (3, 1, 5)),
+        ],
+    )
+    def test_basic(self, dtype, shape_pair):
+        shape1, shape2 = shape_pair
+        size1 = numpy.prod(shape1, dtype=int)
+        size2 = numpy.prod(shape2, dtype=int)
+        x1 = numpy.random.uniform(-5, 5, size1)
+        x2 = numpy.random.uniform(-5, 5, size2)
+        a = numpy.array(x1, dtype=dtype).reshape(shape1)
+        b = numpy.array(x2, dtype=dtype).reshape(shape2)
+        ia = dpnp.array(a)
+        ib = dpnp.array(b)
+
+        result = dpnp.vecdot(ia, ib)
+        expected = numpy.vecdot(a, b)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_complex_dtypes())
+    @pytest.mark.parametrize(
+        "shape_pair",
+        [
+            ((4,), (4,)),  # call_flag: dot
+            ((3, 1), (3, 1)),
+            ((2, 0), (2, 0)),  # zero-size inputs, 1D output
+            ((3, 0, 4), (3, 0, 4)),  # zero-size output
+            ((3, 4), (3, 4)),
+            ((1, 4), (3, 4)),
+            ((4,), (3, 4)),
+            ((3, 4), (1, 4)),
+            ((3, 4), (4,)),
+            ((1, 4, 5), (3, 1, 5)),
+            ((1, 1, 4, 5), (3, 1, 5)),
+        ],
+    )
+    def test_complex(self, dtype, shape_pair):
+        shape1, shape2 = shape_pair
+        size1 = numpy.prod(shape1, dtype=int)
+        size2 = numpy.prod(shape2, dtype=int)
+        x11 = numpy.random.uniform(-5, 5, size1)
+        x12 = numpy.random.uniform(-5, 5, size1)
+        x21 = numpy.random.uniform(-5, 5, size2)
+        x22 = numpy.random.uniform(-5, 5, size2)
+        a = numpy.array(x11 + 1j * x12, dtype=dtype).reshape(shape1)
+        b = numpy.array(x21 + 1j * x22, dtype=dtype).reshape(shape2)
+        ia = dpnp.array(a)
+        ib = dpnp.array(b)
+
+        result = dpnp.vecdot(ia, ib)
+        expected = numpy.vecdot(a, b)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("axis", [0, 2, -2])
+    @pytest.mark.parametrize(
+        "shape_pair",
+        [((4,), (4, 4, 4)), ((3, 4, 5), (3, 4, 5))],
+    )
+    def test_axis1(self, axis, shape_pair):
+        shape1, shape2 = shape_pair
+        size1 = numpy.prod(shape1, dtype=int)
+        size2 = numpy.prod(shape2, dtype=int)
+        a = numpy.array(numpy.random.uniform(-5, 5, size1)).reshape(shape1)
+        b = numpy.array(numpy.random.uniform(-5, 5, size2)).reshape(shape2)
+        ia = dpnp.array(a)
+        ib = dpnp.array(b)
+
+        result = dpnp.vecdot(ia, ib)
+        expected = numpy.vecdot(a, b)
+        assert_dtype_allclose(result, expected)
+
+    def test_axis2(self):
+        # This is a special case, `a`` cannot be broadcast_to `b`
+        a = numpy.arange(4).reshape(1, 4)
+        b = numpy.arange(60).reshape(3, 4, 5)
+        ia = dpnp.array(a)
+        ib = dpnp.array(b)
+
+        result = dpnp.vecdot(ia, ib, axis=1)
+        expected = numpy.vecdot(a, b, axis=1)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize(
+        "axes",
+        [
+            [(1,), (1,), ()],
+            [(0), (0), ()],
+            [0, 1, ()],
+            [-2, -1, ()],
+        ],
+    )
+    def test_axes(self, axes):
+        a = numpy.array(numpy.random.uniform(-10, 10, 125)).reshape(5, 5, 5)
+        ia = dpnp.array(a)
+
+        result = dpnp.vecdot(ia, ia, axes=axes)
+        expected = numpy.vecdot(a, a, axes=axes)
+        assert_dtype_allclose(result, expected)
+
+        out = dpnp.empty((5, 5), dtype=ia.dtype)
+        result = dpnp.vecdot(ia, ia, axes=axes, out=out)
+        assert out is result
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize(
+        "axes, b_shape",
+        [
+            ([1, 0, ()], (3,)),
+            ([1, 0, ()], (3, 1)),
+            ([1, 1, ()], (1, 3)),
+        ],
+    )
+    def test_axes_out_1D(self, axes, b_shape):
+        a = numpy.arange(60).reshape(4, 3, 5)
+        b = numpy.arange(3).reshape(b_shape)
+        ia = dpnp.array(a)
+        ib = dpnp.array(b)
+
+        out_dp = dpnp.empty((4, 5))
+        out_np = numpy.empty((4, 5))
+        result = dpnp.vecdot(ia, ib, axes=axes, out=out_dp)
+        assert result is out_dp
+        expected = numpy.vecdot(a, b, axes=axes, out=out_np)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("stride", [2, -1, -2])
+    def test_strided(self, stride):
+        a = numpy.arange(100).reshape(10, 10)
+        b = numpy.arange(100).reshape(10, 10)
+        ia = dpnp.array(a)
+        ib = dpnp.array(b)
+
+        result = dpnp.vecdot(ia[::stride, ::stride], ib[::stride, ::stride])
+        expected = numpy.vecdot(a[::stride, ::stride], b[::stride, ::stride])
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype1", get_all_dtypes())
+    @pytest.mark.parametrize("dtype2", get_all_dtypes())
+    def test_input_dtype_matrix(self, dtype1, dtype2):
+        x1 = numpy.random.uniform(-5, 5, 10)
+        x2 = numpy.random.uniform(-5, 5, 10)
+        a = numpy.array(x1, dtype=dtype1).reshape(2, 5)
+        b = numpy.array(x2, dtype=dtype2).reshape(2, 5)
+        ia = dpnp.array(a)
+        ib = dpnp.array(b)
+
+        result = dpnp.vecdot(ia, ib)
+        expected = numpy.vecdot(a, b)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("order", ["C", "F", "K", "A"])
+    @pytest.mark.parametrize(
+        "shape",
+        [((4, 3)), ((4, 3, 5)), ((6, 7, 3, 5))],
+        ids=["((4, 3))", "((4, 3, 5))", "((6, 7, 3, 5))"],
+    )
+    def test_order(self, order, shape):
+        a = numpy.arange(numpy.prod(shape)).reshape(shape)
+        b = dpnp.asarray(a)
+
+        result = dpnp.vecdot(b, b, order=order)
+        expected = numpy.vecdot(a, a, order=order)
+        assert result.flags.c_contiguous == expected.flags.c_contiguous
+        assert result.flags.f_contiguous == expected.flags.f_contiguous
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize(
+        "order1, order2, out_order",
+        [
+            ("C", "C", "C"),
+            ("C", "C", "F"),
+            ("C", "F", "C"),
+            ("C", "F", "F"),
+            ("F", "C", "C"),
+            ("F", "C", "F"),
+            ("F", "F", "F"),
+            ("F", "F", "C"),
+        ],
+    )
+    def test_out(self, order1, order2, out_order):
+        a1 = numpy.arange(20).reshape(5, 4, order=order1)
+        a2 = numpy.arange(20).reshape(5, 4, order=order2)
+
+        b1 = dpnp.asarray(a1)
+        b2 = dpnp.asarray(a2)
+
+        dpnp_out = dpnp.empty(5, order=out_order)
+        result = dpnp.vecdot(b1, b2, out=dpnp_out)
+        assert result is dpnp_out
+
+        out = numpy.empty(5, order=out_order)
+        expected = numpy.vecdot(a1, a2, out=out)
+        assert result.flags.c_contiguous == expected.flags.c_contiguous
+        assert result.flags.f_contiguous == expected.flags.f_contiguous
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize(
+        "out_shape",
+        [
+            ((4, 5)),
+            ((6,)),
+            ((4, 7, 2)),
+        ],
+    )
+    def test_out_0D(self, out_shape):
+        # for vecdot of 1-D arrays, output is 0-D and if out keyword is given
+        # NumPy repeats the data to match the shape of output array
+        a = numpy.arange(3)
+        b = dpnp.asarray(a)
+
+        out_np = numpy.empty(out_shape)
+        out_dp = dpnp.empty(out_shape)
+        result = dpnp.vecdot(b, b, out=out_dp)
+        expected = numpy.vecdot(a, a, out=out_np)
+        assert result is out_dp
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("axis", [0, 1, 2, -1, -2, -3])
+    def test_linalg_vecdot(self, axis):
+        x11 = numpy.random.uniform(-5, 5, 4)
+        x12 = numpy.random.uniform(-5, 5, 4)
+        x21 = numpy.random.uniform(-5, 5, 64)
+        x22 = numpy.random.uniform(-5, 5, 64)
+        a = numpy.array(x11 + 1j * x12, dtype=numpy.complex64)
+        b = numpy.array(x21 + 1j * x22, dtype=numpy.complex64).reshape(4, 4, 4)
+        ia = dpnp.array(a)
+        ib = dpnp.array(b)
+
+        result = dpnp.linalg.vecdot(ia, ib)
+        expected = numpy.linalg.vecdot(a, b)
+        assert_dtype_allclose(result, expected)
+
+    def test_error(self):
+        a = dpnp.ones((5, 4))
+        b = dpnp.ones((5, 5))
+        # core dimension differs
+        assert_raises(ValueError, dpnp.vecdot, a, b)
+
+        a = dpnp.ones((5, 4))
+        b = dpnp.ones((3, 4))
+        # input array not compatible
+        assert_raises(ValueError, dpnp.vecdot, a, b)
+
+        a = dpnp.ones((3, 4))
+        b = dpnp.ones((3, 4))
+        c = dpnp.empty((5,))
+        # out array shape is incorrect
+        assert_raises(ValueError, dpnp.vecdot, a, b, out=c)
+
+        # both axes and axis cannot be specified
+        a = dpnp.ones((5, 5))
+        assert_raises(TypeError, dpnp.vecdot, a, a, axes=[0, 0, ()], axis=-1)
+
+        # subok keyword is not supported
+        assert_raises(NotImplementedError, dpnp.vecdot, a, a, subok=False)
+
+        # signature keyword is not supported
+        signature = (dpnp.float32, dpnp.float32, dpnp.float32)
+        assert_raises(
+            NotImplementedError, dpnp.vecdot, a, a, signature=signature
+        )
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index 74c65a65a59c..51221f583484 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -1013,6 +1013,35 @@ def test_matmul(device, shape_pair):
     assert_sycl_queue_equal(result_queue, b2.sycl_queue)
 
 
+@pytest.mark.parametrize(
+    "device",
+    valid_devices,
+    ids=[device.filter_string for device in valid_devices],
+)
+@pytest.mark.parametrize(
+    "shape_pair",
+    [
+        ((4,), (4,)),  # call_flag: dot
+        ((3, 1), (3, 1)),
+        ((2, 0), (2, 0)),  # zero-size inputs, 1D output
+        ((3, 0, 4), (3, 0, 4)),  # zero-size output
+        ((3, 4), (3, 4)),  # call_flag: vecdot
+    ],
+)
+def test_vecdot(device, shape_pair):
+    shape1, shape2 = shape_pair
+    a1 = numpy.arange(numpy.prod(shape1)).reshape(shape1)
+    a2 = numpy.arange(numpy.prod(shape2)).reshape(shape2)
+
+    b1 = dpnp.asarray(a1, device=device)
+    b2 = dpnp.asarray(a2, device=device)
+
+    result = dpnp.vecdot(b1, b2)
+    result_queue = result.sycl_queue
+    assert_sycl_queue_equal(result_queue, b1.sycl_queue)
+    assert_sycl_queue_equal(result_queue, b2.sycl_queue)
+
+
 @pytest.mark.parametrize(
     "func, kwargs",
     [
diff --git a/tests/test_umath.py b/tests/test_umath.py
index 8feec65ed23c..19b5ef6b5034 100644
--- a/tests/test_umath.py
+++ b/tests/test_umath.py
@@ -73,11 +73,9 @@ def get_id(val):
 
 
 # implement missing umaths and to remove the list
-# SAT-7324 vecdot
 # SAT-7323 bitwise_count
 new_umaths_numpy_20 = [
     "bitwise_count",
-    "vecdot",
 ]
 
 
diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py
index 2c1fb32d2301..aeb13ae30733 100644
--- a/tests/test_usm_type.py
+++ b/tests/test_usm_type.py
@@ -498,6 +498,32 @@ def test_matmul(usm_type_x, usm_type_y, shape_pair):
     assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
 
 
+@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize(
+    "shape_pair",
+    [
+        ((4,), (4,)),  # call_flag: dot
+        ((3, 1), (3, 1)),
+        ((2, 0), (2, 0)),  # zero-size inputs, 1D output
+        ((3, 0, 4), (3, 0, 4)),  # zero-size output
+        ((3, 4), (3, 4)),  # call_flag: vecdot
+    ],
+)
+def test_vecdot(usm_type_x, usm_type_y, shape_pair):
+    shape1, shape2 = shape_pair
+    x = numpy.arange(numpy.prod(shape1)).reshape(shape1)
+    y = numpy.arange(numpy.prod(shape2)).reshape(shape2)
+
+    x = dp.array(x, usm_type=usm_type_x)
+    y = dp.array(y, usm_type=usm_type_y)
+    z = dp.vecdot(x, y)
+
+    assert x.usm_type == usm_type_x
+    assert y.usm_type == usm_type_y
+    assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
+
+
 @pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
 @pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
 def test_meshgrid(usm_type_x, usm_type_y):

From dd22aae43470fb427aa0543ea4466b999be1ff28 Mon Sep 17 00:00:00 2001
From: Vahid Tavanashad <vahid.tavanashad@intel.com>
Date: Tue, 15 Oct 2024 02:00:11 -0500
Subject: [PATCH 2/3] improve coverage

---
 dpnp/dpnp_utils/dpnp_utils_linearalgebra.py | 77 ++++++++++-----------
 tests/test_mathematical.py                  | 19 +++--
 tests/test_product.py                       | 24 ++++---
 3 files changed, 66 insertions(+), 54 deletions(-)

diff --git a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
index 9d7459ada02c..bb7848fc4b26 100644
--- a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
+++ b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
@@ -244,6 +244,7 @@ def _get_result_shape(x1, x2, out, func, np_flag):
             x1, x2, x1_ndim, x2_ndim
         )
     else:  # func == "vecdot"
+        assert func == "vecdot"
         x1, x2, result_shape = _get_result_shape_vecdot(
             x1, x2, x1_ndim, x2_ndim
         )
@@ -466,11 +467,15 @@ def _gemm_matmul(exec_q, x1, x2, res):
 
 
 def _shape_error(shape1, shape2, func, err_msg):
+    """Validate the shapes of input and output arrays."""
 
     if func == "matmul":
         signature = "(n?,k),(k,m?)->(n?,m?)"
-    else:  # func == "vecdot"
+    elif func == "vecdot":
         signature = "(n?,),(n?,)->()"
+    else:
+        # applicable when err_msg == 3
+        assert func is None
 
     if err_msg == 0:
         raise ValueError(
@@ -485,7 +490,8 @@ def _shape_error(shape1, shape2, func, err_msg):
             f"array has shape {shape2}. "
             f"These cannot be broadcast together for '{func}' function."
         )
-    elif err_msg == 2:
+    else:  # err_msg == 2:
+        assert err_msg == 2
         raise ValueError(
             f"Expected output array of shape {shape1}, but got {shape2}."
         )
@@ -557,6 +563,7 @@ def _validate_internal(axes, i, ndim):
         x1_ndim = x1.ndim
         x2_ndim = x2.ndim
     else:  # func == "vecdot"
+        assert func == "vecdot"
         x1_ndim = x2_ndim = 1
 
     axes[0] = _validate_internal(axes[0], 0, x1_ndim)
@@ -573,6 +580,16 @@ def _validate_internal(axes, i, ndim):
     return axes
 
 
+def _validate_out_array(out, exec_q):
+    """Validate out is supported array and has correct queue."""
+    if out is not None:
+        dpnp.check_supported_arrays_type(out)
+        if dpctl.utils.get_execution_queue((exec_q, out.sycl_queue)) is None:
+            raise ExecutionPlacementError(
+                "Input and output allocation queues are not compatible"
+            )
+
+
 def dpnp_cross(a, b, cp):
     """Return the cross product of two (arrays of) vectors."""
 
@@ -660,13 +677,7 @@ def dpnp_dot(a, b, /, out=None, *, conjugate=False):
         )
 
     res_usm_type, exec_q = get_usm_allocations([a, b])
-    if (
-        out is not None
-        and dpctl.utils.get_execution_queue((exec_q, out.sycl_queue)) is None
-    ):
-        raise ExecutionPlacementError(
-            "Input and output allocation queues are not compatible"
-        )
+    _validate_out_array(out, exec_q)
 
     # Determine the appropriate data types
     dot_dtype, res_dtype = _compute_res_dtype(a, b, sycl_queue=exec_q)
@@ -755,19 +766,17 @@ def dpnp_matmul(
 
     dpnp.check_supported_arrays_type(x1, x2)
     res_usm_type, exec_q = get_usm_allocations([x1, x2])
-    if out is not None:
-        dpnp.check_supported_arrays_type(out)
-        if dpctl.utils.get_execution_queue((exec_q, out.sycl_queue)) is None:
-            raise ExecutionPlacementError(
-                "Input and output allocation queues are not compatible"
-            )
+    _validate_out_array(out, exec_q)
 
-    if order in ["a", "A"]:
+    if order in "aA":
         if x1.flags.fnc and x2.flags.fnc:
             order = "F"
         else:
             order = "C"
 
+    if order in "kK":
+        order = "C"
+
     x1_ndim = x1.ndim
     x2_ndim = x2.ndim
     if axes is not None:
@@ -938,6 +947,7 @@ def dpnp_matmul(
                     result,
                 )
             else:  # call_flag == "gemm_batch"
+                assert call_flag == "gemm_batch"
                 result = _gemm_batch_matmul(
                     exec_q,
                     x1,
@@ -962,14 +972,7 @@ def dpnp_matmul(
                 result = dpnp.moveaxis(result, (-1,), axes_res)
             return dpnp.ascontiguousarray(result)
 
-        # If `order` was not passed as default
-        # we need to update it to match the passed `order`.
-        if order not in ["k", "K"]:
-            return dpnp.asarray(result, order=order)
-        # dpnp.ascontiguousarray changes 0-D array to 1-D array
-        if result.ndim == 0:
-            return result
-        return dpnp.ascontiguousarray(result)
+        return dpnp.asarray(result, order=order)
 
     result = dpnp.get_result_array(result, out, casting=casting)
     if axes is not None and out is result:
@@ -994,14 +997,9 @@ def dpnp_vecdot(
 
     dpnp.check_supported_arrays_type(x1, x2)
     res_usm_type, exec_q = get_usm_allocations([x1, x2])
-    if out is not None:
-        dpnp.check_supported_arrays_type(out)
-        if dpctl.utils.get_execution_queue((exec_q, out.sycl_queue)) is None:
-            raise ExecutionPlacementError(
-                "Input and output allocation queues are not compatible"
-            )
+    _validate_out_array(out, exec_q)
 
-    if order in ["a", "A"]:
+    if order in "aAkK":
         if x1.flags.fnc and x2.flags.fnc:
             order = "F"
         else:
@@ -1048,7 +1046,7 @@ def dpnp_vecdot(
     _, x1_is_1D, _ = _define_dim_flags(x1, axis=-1)
     _, x2_is_1D, _ = _define_dim_flags(x2, axis=-1)
 
-    if numpy.prod(result_shape) == 0 or x1.size == 0 or x2.size == 0:
+    if x1.size == 0 or x2.size == 0:
         order = "C" if order in "kK" else order
         result = _create_result_array(
             x1,
@@ -1060,8 +1058,9 @@ def dpnp_vecdot(
             sycl_queue=exec_q,
             order=order,
         )
-        if x1.size == 0 or x2.size == 0:
-            result.fill(0)
+        if numpy.prod(result_shape) == 0:
+            return result
+        result.fill(0)
         return result
     elif x1_is_1D and x2_is_1D:
         call_flag = "dot"
@@ -1079,6 +1078,7 @@ def dpnp_vecdot(
         else:
             result = dpnp_dot(x1, x2, out=out, conjugate=True)
     else:  # call_flag == "vecdot"
+        assert call_flag == "vecdot"
         x1_usm = dpnp.get_usm_ndarray(x1)
         x2_usm = dpnp.get_usm_ndarray(x2)
         result = dpnp_array._create_from_usm_ndarray(
@@ -1091,13 +1091,6 @@ def dpnp_vecdot(
         result = dpnp.reshape(result, result_shape)
 
     if out is None:
-        # If `order` was not passed as default
-        # we need to update it to match the passed `order`.
-        if order not in "kK":
-            return dpnp.asarray(result, order=order)
-        # dpnp.ascontiguousarray changes 0-D array to 1-D array
-        if result.ndim == 0:
-            return result
-        return dpnp.ascontiguousarray(result)
+        return dpnp.asarray(result, order=order)
 
     return dpnp.get_result_array(result, out, casting=casting)
diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py
index a3c29eac3223..3d85022459ba 100644
--- a/tests/test_mathematical.py
+++ b/tests/test_mathematical.py
@@ -3648,6 +3648,8 @@ def test_matmul_dtype_matrix_inputs(self, dtype1, dtype2, shape_pair):
         expected = numpy.matmul(a1, a2)
         assert_dtype_allclose(result, expected)
 
+    @pytest.mark.parametrize("order1", ["C", "F", "A"])
+    @pytest.mark.parametrize("order2", ["C", "F", "A"])
     @pytest.mark.parametrize("order", ["C", "F", "K", "A"])
     @pytest.mark.parametrize(
         "shape_pair",
@@ -3662,17 +3664,26 @@ def test_matmul_dtype_matrix_inputs(self, dtype1, dtype2, shape_pair):
             "((6, 7, 4, 3), (6, 7, 3, 5))",
         ],
     )
-    def test_matmul_order(self, order, shape_pair):
+    def test_matmul_order(self, order1, order2, order, shape_pair):
         shape1, shape2 = shape_pair
-        a1 = numpy.arange(numpy.prod(shape1)).reshape(shape1)
-        a2 = numpy.arange(numpy.prod(shape2)).reshape(shape2)
+        a1 = numpy.arange(numpy.prod(shape1)).reshape(shape1, order=order1)
+        a2 = numpy.arange(numpy.prod(shape2)).reshape(shape2, order=order2)
 
         b1 = dpnp.asarray(a1)
         b2 = dpnp.asarray(a2)
 
         result = dpnp.matmul(b1, b2, order=order)
         expected = numpy.matmul(a1, a2, order=order)
-        assert result.flags.c_contiguous == expected.flags.c_contiguous
+        # For the special case of shape_pair == ((6, 7, 4, 3), (6, 7, 3, 5))
+        # and order1 == "F" and order2 == "F", NumPy result is not c-contiguous
+        # nor f-contiguous, while dpnp (and cupy) results are c-contiguous
+        if not (
+            shape_pair == ((6, 7, 4, 3), (6, 7, 3, 5))
+            and order1 == "F"
+            and order2 == "F"
+            and order == "K"
+        ):
+            assert result.flags.c_contiguous == expected.flags.c_contiguous
         assert result.flags.f_contiguous == expected.flags.f_contiguous
         assert_dtype_allclose(result, expected)
 
diff --git a/tests/test_product.py b/tests/test_product.py
index 5fb61c81b43a..e4ac9aac1074 100644
--- a/tests/test_product.py
+++ b/tests/test_product.py
@@ -1343,6 +1343,7 @@ def setup_method(self):
         "shape_pair",
         [
             ((4,), (4,)),  # call_flag: dot
+            ((1, 1, 4), (1, 1, 4)),  # call_flag: dot
             ((3, 1), (3, 1)),
             ((2, 0), (2, 0)),  # zero-size inputs, 1D output
             ((3, 0, 4), (3, 0, 4)),  # zero-size output
@@ -1353,6 +1354,7 @@ def setup_method(self):
             ((3, 4), (4,)),
             ((1, 4, 5), (3, 1, 5)),
             ((1, 1, 4, 5), (3, 1, 5)),
+            ((1, 4, 5), (1, 3, 1, 5)),
         ],
     )
     def test_basic(self, dtype, shape_pair):
@@ -1375,6 +1377,7 @@ def test_basic(self, dtype, shape_pair):
         "shape_pair",
         [
             ((4,), (4,)),  # call_flag: dot
+            ((1, 1, 4), (1, 1, 4)),  # call_flag: dot
             ((3, 1), (3, 1)),
             ((2, 0), (2, 0)),  # zero-size inputs, 1D output
             ((3, 0, 4), (3, 0, 4)),  # zero-size output
@@ -1385,6 +1388,7 @@ def test_basic(self, dtype, shape_pair):
             ((3, 4), (4,)),
             ((1, 4, 5), (3, 1, 5)),
             ((1, 1, 4, 5), (3, 1, 5)),
+            ((1, 4, 5), (1, 3, 1, 5)),
         ],
     )
     def test_complex(self, dtype, shape_pair):
@@ -1501,18 +1505,22 @@ def test_input_dtype_matrix(self, dtype1, dtype2):
         expected = numpy.vecdot(a, b)
         assert_dtype_allclose(result, expected)
 
+    @pytest.mark.parametrize("order1", ["C", "F", "A"])
+    @pytest.mark.parametrize("order2", ["C", "F", "A"])
     @pytest.mark.parametrize("order", ["C", "F", "K", "A"])
     @pytest.mark.parametrize(
         "shape",
-        [((4, 3)), ((4, 3, 5)), ((6, 7, 3, 5))],
-        ids=["((4, 3))", "((4, 3, 5))", "((6, 7, 3, 5))"],
+        [(4, 3), (4, 3, 5), (6, 7, 3, 5)],
+        ids=["(4, 3)", "(4, 3, 5)", "(6, 7, 3, 5)"],
     )
-    def test_order(self, order, shape):
-        a = numpy.arange(numpy.prod(shape)).reshape(shape)
-        b = dpnp.asarray(a)
-
-        result = dpnp.vecdot(b, b, order=order)
-        expected = numpy.vecdot(a, a, order=order)
+    def test_order(self, order1, order2, order, shape):
+        a = numpy.arange(numpy.prod(shape)).reshape(shape, order=order1)
+        b = numpy.arange(numpy.prod(shape)).reshape(shape, order=order2)
+        a_dp = dpnp.asarray(a)
+        b_dp = dpnp.asarray(b)
+
+        result = dpnp.vecdot(a_dp, b_dp, order=order)
+        expected = numpy.vecdot(a, b, order=order)
         assert result.flags.c_contiguous == expected.flags.c_contiguous
         assert result.flags.f_contiguous == expected.flags.f_contiguous
         assert_dtype_allclose(result, expected)

From 0f53876f044353ddbd5b424e1541e28a75bdf5e4 Mon Sep 17 00:00:00 2001
From: Vahid Tavanashad <vahid.tavanashad@intel.com>
Date: Fri, 25 Oct 2024 10:18:54 -0500
Subject: [PATCH 3/3] address comments

---
 dpnp/dpnp_iface_linearalgebra.py            | 18 +++---
 dpnp/dpnp_utils/dpnp_utils_linearalgebra.py | 65 ++++++++++-----------
 tests/test_product.py                       | 51 +++++++++++++++-
 3 files changed, 90 insertions(+), 44 deletions(-)

diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py
index f6c83f4e50c6..49e1e3df8921 100644
--- a/dpnp/dpnp_iface_linearalgebra.py
+++ b/dpnp/dpnp_iface_linearalgebra.py
@@ -147,11 +147,11 @@ def dot(a, b, out=None):
         # TODO: use specific scalar-vector kernel
         return dpnp.multiply(a, b, out=out)
 
+    # numpy.dot does not allow casting even if it is safe
+    # casting="no" is used in the following
     if a_ndim == 1 and b_ndim == 1:
-        return dpnp_dot(a, b, out=out)
+        return dpnp_dot(a, b, out=out, casting="no")
 
-    # NumPy does not allow casting even if it is safe
-    # casting="no" is used in the following
     if a_ndim == 2 and b_ndim == 2:
         return dpnp.matmul(a, b, out=out, casting="no")
 
@@ -753,6 +753,7 @@ def matmul(
         Type to use in computing the matrix product. By default, the returned
         array will have data type that is determined by considering
         Promotion Type Rule and device capabilities.
+        Default: ``None``.
     casting : {"no", "equiv", "safe", "same_kind", "unsafe"}, optional
         Controls what kind of data casting may occur.
         Default: ``"same_kind"``.
@@ -1203,7 +1204,7 @@ def vecdot(
     .. math::
        \mathbf{a} \cdot \mathbf{b} = \sum_{i=0}^{n-1} \overline{a_i}b_i
 
-    where the sum is over the last dimension (unless axis is specified) and
+    where the sum is over the last dimension (unless `axis` is specified) and
     where :math:`\overline{a_i}` denotes the complex conjugate if :math:`a_i`
     is complex and the identity otherwise.
 
@@ -1221,16 +1222,17 @@ def vecdot(
         removed. If not provided or ``None``, a freshly-allocated array is
         used.
         Default: ``None``.
-    dtype : {None, dtype}, optional
-        Type to use in computing the vector dot product. By default, the
-        returned array will have data type that is determined by considering
-        Promotion Type Rule and device capabilities.
     casting : {"no", "equiv", "safe", "same_kind", "unsafe"}, optional
         Controls what kind of data casting may occur.
         Default: ``"same_kind"``.
     order : {"C", "F", "A", "K", None}, optional
         Memory layout of the newly output array, if parameter `out` is ``None``.
         Default: ``"K"``.
+    dtype : {None, dtype}, optional
+        Type to use in computing the vector dot product. By default, the
+        returned array will have data type that is determined by considering
+        Promotion Type Rule and device capabilities.
+        Default: ``None``.
     axes : {None, list of tuples}, optional
         A list of tuples with indices of axes the matrix product should operate
         on. For instance, for the signature of ``(i),(i)->()``, the base
diff --git a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
index bb7848fc4b26..ac432d595911 100644
--- a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
+++ b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
@@ -189,7 +189,7 @@ def _define_contig_flag(x):
 
 def _define_dim_flags(x, axis):
     """
-    Define useful flags for the main calculation in dpnp_matmul.
+    Define useful flags for the calculations in dpnp_matmul and dpnp_vecdot.
     x_is_1D: `x` is 1D array or inherently 1D (all dimensions are equal to one
     except for one of them), for instance, if x.shape = (1, 1, 1, 2),
     then x_is_1D = True
@@ -220,7 +220,7 @@ def _define_dim_flags(x, axis):
     return x_is_2D, x_is_1D, x_base_is_1D
 
 
-def _get_result_shape(x1, x2, out, func, np_flag):
+def _get_result_shape(x1, x2, out, _get_result_shape_fn, np_flag):
     """
     Three task are completed in this function:
         - Get the shape of the result array.
@@ -239,15 +239,7 @@ def _get_result_shape(x1, x2, out, func, np_flag):
             "The second input array does not have enough dimensions (has 0, but requires at least 1)"
         )
 
-    if func == "matmul":
-        x1, x2, result_shape = _get_result_shape_matmul(
-            x1, x2, x1_ndim, x2_ndim
-        )
-    else:  # func == "vecdot"
-        assert func == "vecdot"
-        x1, x2, result_shape = _get_result_shape_vecdot(
-            x1, x2, x1_ndim, x2_ndim
-        )
+    x1, x2, result_shape = _get_result_shape_fn(x1, x2, x1_ndim, x2_ndim)
 
     if out is not None:
         out_shape = out.shape
@@ -474,7 +466,7 @@ def _shape_error(shape1, shape2, func, err_msg):
     elif func == "vecdot":
         signature = "(n?,),(n?,)->()"
     else:
-        # applicable when err_msg == 3
+        # applicable when err_msg == 2
         assert func is None
 
     if err_msg == 0:
@@ -655,7 +647,7 @@ def dpnp_cross(a, b, cp):
     return cp
 
 
-def dpnp_dot(a, b, /, out=None, *, conjugate=False):
+def dpnp_dot(a, b, /, out=None, *, casting="same_kind", conjugate=False):
     """
     Return the dot product of two arrays.
 
@@ -717,8 +709,7 @@ def dpnp_dot(a, b, /, out=None, *, conjugate=False):
     if dot_dtype != res_dtype:
         result = result.astype(res_dtype, copy=False)
 
-    # numpy.dot does not allow casting even if it is safe
-    return dpnp.get_result_array(result, out, casting="no")
+    return dpnp.get_result_array(result, out, casting=casting)
 
 
 def dpnp_kron(a, b, a_ndim, b_ndim):
@@ -773,8 +764,10 @@ def dpnp_matmul(
             order = "F"
         else:
             order = "C"
-
-    if order in "kK":
+    elif order in "kK":
+        # For order="K", we return order="C" to align with NumPy behavior
+        # It is different than logic used in dpnp_vecdot because NumPy
+        # behaves differently for matmul and vecdot
         order = "C"
 
     x1_ndim = x1.ndim
@@ -806,7 +799,7 @@ def dpnp_matmul(
     )
 
     x1, x2, result_shape = _get_result_shape(
-        x1, x2, out, "matmul", NumPy_special_behavior
+        x1, x2, out, _get_result_shape_matmul, NumPy_special_behavior
     )
 
     # Determine the appropriate data types
@@ -1000,6 +993,9 @@ def dpnp_vecdot(
     _validate_out_array(out, exec_q)
 
     if order in "aAkK":
+        # This logic is also used for order="K" to align with NumPy behavior.
+        # It is different than logic used in dpnp_matmul because NumPy
+        # behaves differently for matmul and vecdot
         if x1.flags.fnc and x2.flags.fnc:
             order = "F"
         else:
@@ -1035,7 +1031,7 @@ def dpnp_vecdot(
     )
 
     x1, x2, result_shape = _get_result_shape(
-        x1, x2, out, "vecdot", NumPy_special_behavior
+        x1, x2, out, _get_result_shape_vecdot, NumPy_special_behavior
     )
 
     # Determine the appropriate data types
@@ -1047,21 +1043,7 @@ def dpnp_vecdot(
     _, x2_is_1D, _ = _define_dim_flags(x2, axis=-1)
 
     if x1.size == 0 or x2.size == 0:
-        order = "C" if order in "kK" else order
-        result = _create_result_array(
-            x1,
-            x2,
-            out,
-            shape=result_shape,
-            dtype=res_dtype,
-            usm_type=res_usm_type,
-            sycl_queue=exec_q,
-            order=order,
-        )
-        if numpy.prod(result_shape) == 0:
-            return result
-        result.fill(0)
-        return result
+        call_flag = "trivial"
     elif x1_is_1D and x2_is_1D:
         call_flag = "dot"
         # arrays are inehrently 1D, make them 1D
@@ -1072,7 +1054,20 @@ def dpnp_vecdot(
         call_flag = "vecdot"
 
     # dispatch to proper function call
-    if call_flag == "dot":
+    if call_flag == "trivial":
+        result = _create_result_array(
+            x1,
+            x2,
+            out,
+            shape=result_shape,
+            dtype=res_dtype,
+            usm_type=res_usm_type,
+            sycl_queue=exec_q,
+            order=order,
+        )
+        if numpy.prod(result_shape) != 0:
+            result.fill(0)
+    elif call_flag == "dot":
         if out is not None and out.shape != ():
             result = dpnp_dot(x1, x2, out=None, conjugate=True)
         else:
diff --git a/tests/test_product.py b/tests/test_product.py
index e4ac9aac1074..f989ba837343 100644
--- a/tests/test_product.py
+++ b/tests/test_product.py
@@ -1525,6 +1525,27 @@ def test_order(self, order1, order2, order, shape):
         assert result.flags.f_contiguous == expected.flags.f_contiguous
         assert_dtype_allclose(result, expected)
 
+    @pytest.mark.parametrize("order", ["C", "F", "K", "A"])
+    @pytest.mark.parametrize(
+        "shape", [(2, 4, 0), (4, 0, 5)], ids=["(2, 4, 0)", "(4, 0, 5)"]
+    )
+    def test_order_trivial(self, order, shape):
+        # input is both c-contiguous and f-contiguous
+        a = numpy.ones(shape)
+        a_dp = dpnp.asarray(a)
+
+        result = dpnp.vecdot(a_dp, a_dp, order=order)
+        expected = numpy.vecdot(a, a, order=order)
+        if shape == (2, 4, 0) and order == "A":
+            # NumPy does not behave correctly for this case, for order="A",
+            # if input is both c- and f-contiguous, output is c-contiguous
+            assert result.flags.c_contiguous
+            assert not result.flags.f_contiguous
+        else:
+            assert result.flags.c_contiguous == expected.flags.c_contiguous
+            assert result.flags.f_contiguous == expected.flags.f_contiguous
+        assert_dtype_allclose(result, expected)
+
     @pytest.mark.parametrize(
         "order1, order2, out_order",
         [
@@ -1538,7 +1559,7 @@ def test_order(self, order1, order2, order, shape):
             ("F", "F", "C"),
         ],
     )
-    def test_out(self, order1, order2, out_order):
+    def test_out_order(self, order1, order2, out_order):
         a1 = numpy.arange(20).reshape(5, 4, order=order1)
         a2 = numpy.arange(20).reshape(5, 4, order=order2)
 
@@ -1555,6 +1576,34 @@ def test_out(self, order1, order2, out_order):
         assert result.flags.f_contiguous == expected.flags.f_contiguous
         assert_dtype_allclose(result, expected)
 
+    @pytest.mark.parametrize("dtype1", get_all_dtypes(no_none=True))
+    @pytest.mark.parametrize("dtype2", get_all_dtypes(no_none=True))
+    @pytest.mark.parametrize(
+        "shape_pair",
+        [
+            ((4,), ()),
+            ((1, 1, 4), (1, 1)),
+            ((6, 7, 4, 3), (6, 7, 4)),
+            ((2, 0), (2,)),  # zero-size inputs, 1D output
+            ((3, 0, 4), (3, 0)),  # zero-size output
+        ],
+    )
+    def test_out_dtype(self, dtype1, dtype2, shape_pair):
+        shape1, shape2 = shape_pair
+        a = numpy.ones(shape1, dtype=dtype1)
+        b = dpnp.asarray(a)
+
+        out_np = numpy.empty(shape2, dtype=dtype2)
+        out_dp = dpnp.asarray(out_np)
+
+        if dpnp.can_cast(dtype1, dtype2, casting="same_kind"):
+            result = dpnp.vecdot(b, b, out=out_dp)
+            expected = numpy.vecdot(a, a, out=out_np)
+            assert_dtype_allclose(result, expected)
+        else:
+            with pytest.raises(TypeError):
+                dpnp.vecdot(b, b, out=out_dp)
+
     @pytest.mark.parametrize(
         "out_shape",
         [