From 308b8cfb415b6cbcf36b7f374cd988993a33bbfd Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Mon, 23 Oct 2023 13:57:45 +0200
Subject: [PATCH 01/55] Add a new impl of dpnp.linalg._lu_factor

---
 dpnp/backend/extensions/lapack/CMakeLists.txt |   1 +
 dpnp/backend/extensions/lapack/getrf.cpp      | 181 ++++++++++++++++++
 dpnp/backend/extensions/lapack/getrf.hpp      |  51 +++++
 dpnp/backend/extensions/lapack/lapack_py.cpp  |   8 +
 .../extensions/lapack/types_matrix.hpp        |  26 +++
 dpnp/linalg/dpnp_utils_linalg.py              |  99 ++++++++++
 6 files changed, 366 insertions(+)
 create mode 100644 dpnp/backend/extensions/lapack/getrf.cpp
 create mode 100644 dpnp/backend/extensions/lapack/getrf.hpp
diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt
index 0c90b4f0ca52..6dbb36b67df9 100644
--- a/dpnp/backend/extensions/lapack/CMakeLists.txt
+++ b/dpnp/backend/extensions/lapack/CMakeLists.txt
@@ -29,6 +29,7 @@ pybind11_add_module(${python_module_name} MODULE
     lapack_py.cpp
     heevd.cpp
     syevd.cpp
+    getrf.cpp
 )
 
 if (WIN32)
diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp
new file mode 100644
index 000000000000..0b988aedb794
--- /dev/null
+++ b/dpnp/backend/extensions/lapack/getrf.cpp
@@ -0,0 +1,181 @@
+//*****************************************************************************
+// Copyright (c) 2023, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include <pybind11/pybind11.h>
+
+// dpctl tensor headers
+#include "utils/memory_overlap.hpp"
+#include "utils/type_utils.hpp"
+
+#include "getrf.hpp"
+#include "types_matrix.hpp"
+
+#include "dpnp_utils.hpp"
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace lapack
+{
+namespace mkl_lapack = oneapi::mkl::lapack;
+namespace py = pybind11;
+namespace type_utils = dpctl::tensor::type_utils;
+
+typedef sycl::event (*getrf_impl_fn_ptr_t)(sycl::queue,
+                                           const std::int64_t,
+                                           char *,
+                                           std::int64_t,
+                                           std::int64_t *,
+                                           std::vector<sycl::event> &,
+                                           const std::vector<sycl::event> &);
+
+static getrf_impl_fn_ptr_t getrf_dispatch_vector[dpctl_td_ns::num_types];
+
+template <typename T>
+static sycl::event getrf_impl(sycl::queue exec_q,
+                              const std::int64_t n,
+                              char *in_a,
+                              std::int64_t lda,
+                              std::int64_t *ipiv,
+                              std::vector<sycl::event> &host_task_events,
+                              const std::vector<sycl::event> &depends)
+{
+    type_utils::validate_type_for_device<T>(exec_q);
+
+    T *a = reinterpret_cast<T *>(in_a);
+
+    const std::int64_t scratchpad_size =
+        oneapi::mkl::lapack::getrf_scratchpad_size<T>(exec_q, n, n, lda);
+    T *scratchpad = nullptr;
+
+    std::stringstream error_msg;
+    std::int64_t info = 0;
+
+    sycl::event getrf_event;
+    try {
+        scratchpad = sycl::malloc_device<T>(scratchpad_size, exec_q);
+
+        getrf_event = oneapi::mkl::lapack::getrf(
+            exec_q,
+            n,   // The order of the matrix A; (0 ≤ n).
+            n,   // The order of the matrix A; (0 ≤ n).
+            a,   // Pointer to the n-by-n coefficient matrix A.
+            lda, // The leading dimension of a.
+            ipiv,
+            scratchpad, // Pointer to scratchpad memory to be used by MKL
+                        // routine for storing intermediate results
+            scratchpad_size, depends);
+    } catch (mkl_lapack::exception const &e) {
+        error_msg
+            << "Unexpected MKL exception caught during getrf() call:\nreason: "
+            << e.what() << "\ninfo: " << e.info();
+        info = e.info();
+    } catch (sycl::exception const &e) {
+        error_msg << "Unexpected SYCL exception caught during getrf() call:\n"
+                  << e.what();
+        info = -1;
+    }
+
+    if (info != 0) // an unexpected error occurs
+    {
+        if (scratchpad != nullptr) {
+            sycl::free(scratchpad, exec_q);
+        }
+        throw std::runtime_error(error_msg.str());
+    }
+
+    sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(getrf_event);
+        auto ctx = exec_q.get_context();
+        cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); });
+    });
+    host_task_events.push_back(clean_up_event);
+    return getrf_event;
+}
+
+sycl::event getrf(sycl::queue q,
+                  const std::int64_t n,
+                  dpctl::tensor::usm_ndarray a_array,
+                  dpctl::tensor::usm_ndarray ipiv_array,
+                  const std::vector<sycl::event> &depends = {})
+{
+
+    auto array_types = dpctl_td_ns::usm_ndarray_types();
+    int a_array_type_id =
+        array_types.typenum_to_lookup_id(a_array.get_typenum());
+
+    getrf_impl_fn_ptr_t getrf_fn = getrf_dispatch_vector[a_array_type_id];
+    if (getrf_fn == nullptr) {
+        throw py::value_error(
+            "No getrf implementation defined for the provided type "
+            "of the input matrix.");
+    }
+
+    char *a_array_data = a_array.get_data();
+    const std::int64_t lda = std::max<size_t>(1UL, n);
+
+    // check valid ipiv
+
+    char *ipiv_array_data = ipiv_array.get_data();
+    std::int64_t *d_ipiv = reinterpret_cast<std::int64_t *>(ipiv_array_data);
+
+    // std::vector<std::int64_t> ipiv(n);
+    // std::int64_t* d_ipiv = sycl::malloc_device<std::int64_t>(n, q);
+
+    std::vector<sycl::event> host_task_events;
+    sycl::event getrf_ev =
+        getrf_fn(q, n, a_array_data, lda, d_ipiv, host_task_events, depends);
+
+    return getrf_ev;
+}
+
+template <typename fnT, typename T>
+struct GetrfContigFactory
+{
+    fnT get()
+    {
+        if constexpr (types::GetrfTypePairSupportFactory<T>::is_defined) {
+            return getrf_impl<T>;
+        }
+        else {
+            return nullptr;
+        }
+    }
+};
+
+void init_getrf_dispatch_vector(void)
+{
+    dpctl_td_ns::DispatchVectorBuilder<getrf_impl_fn_ptr_t, GetrfContigFactory,
+                                       dpctl_td_ns::num_types>
+        contig;
+    contig.populate_dispatch_vector(getrf_dispatch_vector);
+}
+} // namespace lapack
+} // namespace ext
+} // namespace backend
+} // namespace dpnp
diff --git a/dpnp/backend/extensions/lapack/getrf.hpp b/dpnp/backend/extensions/lapack/getrf.hpp
new file mode 100644
index 000000000000..0fabf213960e
--- /dev/null
+++ b/dpnp/backend/extensions/lapack/getrf.hpp
@@ -0,0 +1,51 @@
+//*****************************************************************************
+// Copyright (c) 2023, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <CL/sycl.hpp>
+#include <oneapi/mkl.hpp>
+
+#include <dpctl4pybind11.hpp>
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace lapack
+{
+sycl::event getrf(sycl::queue exec_q,
+                  const std::int64_t n,
+                  dpctl::tensor::usm_ndarray a_array,
+                  dpctl::tensor::usm_ndarray ipiv_array,
+                  const std::vector<sycl::event> &depends);
+
+extern void init_getrf_dispatch_vector(void);
+} // namespace lapack
+} // namespace ext
+} // namespace backend
+} // namespace dpnp
diff --git a/dpnp/backend/extensions/lapack/lapack_py.cpp b/dpnp/backend/extensions/lapack/lapack_py.cpp
index 97b67d59e24e..92f88c560159 100644
--- a/dpnp/backend/extensions/lapack/lapack_py.cpp
+++ b/dpnp/backend/extensions/lapack/lapack_py.cpp
@@ -30,6 +30,7 @@
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 
+#include "getrf.hpp"
 #include "heevd.hpp"
 #include "syevd.hpp"
 
@@ -40,6 +41,7 @@ namespace py = pybind11;
 void init_dispatch_vectors(void)
 {
     lapack_ext::init_syevd_dispatch_vector();
+    lapack_ext::init_getrf_dispatch_vector();
 }
 
 // populate dispatch tables
@@ -66,4 +68,10 @@ PYBIND11_MODULE(_lapack_impl, m)
           py::arg("sycl_queue"), py::arg("jobz"), py::arg("upper_lower"),
           py::arg("eig_vecs"), py::arg("eig_vals"),
           py::arg("depends") = py::list());
+
+    m.def("_getrf", &lapack_ext::getrf,
+          "Call `getrf` from OneMKL LAPACK library to return "
+          "the LU factorization of a general n x n matrix",
+          py::arg("sycl_queue"), py::arg("n"), py::arg("a_array"),
+          py::arg("ipiv_array"), py::arg("depends") = py::list());
 }
diff --git a/dpnp/backend/extensions/lapack/types_matrix.hpp b/dpnp/backend/extensions/lapack/types_matrix.hpp
index 3cab18d3c63d..b5276e5f1318 100644
--- a/dpnp/backend/extensions/lapack/types_matrix.hpp
+++ b/dpnp/backend/extensions/lapack/types_matrix.hpp
@@ -80,6 +80,32 @@ struct SyevdTypePairSupportFactory
         // fall-through
         dpctl_td_ns::NotDefinedEntry>::is_defined;
 };
+
+/**
+ * @brief A factory to define pairs of supported types for which
+ * MKL LAPACK library provides support in oneapi::mkl::lapack::getrf<T>
+ * function.
+ *
+ * @tparam T Type of array containing input matrix A,
+ * as well as the output array for storing the LU factorization.
+ */
+template <typename T>
+struct GetrfTypePairSupportFactory
+{
+    static constexpr bool is_defined = std::disjunction<
+        dpctl_td_ns::TypePairDefinedEntry<T, double, T, double>,
+        dpctl_td_ns::TypePairDefinedEntry<T, float, T, float>,
+        dpctl_td_ns::TypePairDefinedEntry<T,
+                                          std::complex<float>,
+                                          T,
+                                          std::complex<float>>,
+        dpctl_td_ns::TypePairDefinedEntry<T,
+                                          std::complex<double>,
+                                          T,
+                                          std::complex<double>>,
+        // fall-through
+        dpctl_td_ns::NotDefinedEntry>::is_defined;
+};
 } // namespace types
 } // namespace lapack
 } // namespace ext
diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index 54c01c20248e..98ac1ec8024a 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -164,3 +164,102 @@ def dpnp_eigh(a, UPLO):
         ht_copy_ev.wait()
 
         return w, out_v
+
+
+def _lu_factor(a, dtype=dpnp.float32):
+    """Compute pivoted LU decomposition.
+
+    Decompose a given batch of square matrices. Inputs and outputs are
+    transposed.
+
+    Args:
+        a (dpnp.ndarray): The input matrix with dimension ``(..., N, N)``.
+           The dimension condition is not checked.
+        dtype (dpnp.dtype): float32, float64, complex64, or complex128.
+
+    Returns:
+        tuple:
+        lu_t (dpnp.ndarray):
+            ``L`` without its unit diagonal and ``U`` with
+            dimension ``(..., N, N)``.
+        piv (dpnp.ndarray):
+            1-origin pivot indices with dimension
+            ``(..., N)``.
+
+    See Also
+    --------
+    :obj:`scipy.linalg.lu_factor`
+
+    """
+
+    a_usm_arr = dpnp.get_usm_ndarray(a)
+
+    # TODO: use dpnp.linalg.LinAlgError
+    if a.ndim < 2:
+        raise ValueError(
+            f"{a.ndim}-dimensional array given. The input "
+            "array must be at least two-dimensional"
+        )
+
+    n, m = a.shape[-2:]
+    # TODO: use dpnp.linalg.LinAlgError
+    if m != n:
+        raise ValueError("Last 2 dimensions of the input array must be square")
+
+    # orig_shape = a.shape
+
+    a_order = "C" if a.flags.c_contiguous else "F"
+
+    exec_q = a.sycl_queue
+    if exec_q is None:
+        raise ValueError(
+            "Execution placement can not be unambiguously inferred "
+            "from input arguments."
+        )
+
+    # TODO: Use linalg_common_type from #1598
+    if dpnp.issubdtype(a.dtype, dpnp.floating):
+        res_type = (
+            a.dtype if exec_q.sycl_device.has_aspect_fp64 else dpnp.float32
+        )
+    elif dpnp.issubdtype(a.dtype, dpnp.complexfloating):
+        res_type = (
+            a.dtype if exec_q.sycl_device.has_aspect_fp64 else dpnp.complex64
+        )
+    else:
+        res_type = (
+            dpnp.float64 if exec_q.sycl_device.has_aspect_fp64 else dpnp.float32
+        )
+
+    a_h = dpnp.empty_like(a, order="C", dtype=res_type)
+    ipiv_h = dpnp.empty(
+        n, dtype=dpnp.int64, usm_type=a.usm_type, sycl_queue=a.sycl_queue
+    )
+
+    a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
+        src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a.sycl_queue
+    )
+
+    lapack_ev = li._getrf(
+        exec_q, n, a_h.get_array(), ipiv_h.get_array(), [a_copy_ev]
+    )
+
+    if a_order != "C":
+        # need to align order of the result of solutions with the
+        # input array of multiple dependent variables
+        a_h_f = dpnp.empty_like(a_h, order=a_order)
+        ht_copy_out_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray(
+            src=a_h.get_array(),
+            dst=a_h_f.get_array(),
+            sycl_queue=a.sycl_queue,
+            depends=[lapack_ev],
+        )
+        ht_copy_out_ev.wait()
+        out_v = (a_h_f, ipiv_h)
+    else:
+        out_v = (a_h, ipiv_h)
+
+    lapack_ev.wait()
+    a_ht_copy_ev.wait()
+
+    return out_v

From 111934144b2d141d5d5836c672890c199d38a2ec Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 25 Oct 2023 18:32:20 +0200
Subject: [PATCH 02/55] Get dev_info_array after calling getrf

---
 dpnp/backend/extensions/lapack/getrf.cpp     | 24 ++++++++++++++++----
 dpnp/backend/extensions/lapack/getrf.hpp     |  1 +
 dpnp/backend/extensions/lapack/lapack_py.cpp |  3 ++-
 3 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp
index 0b988aedb794..d48aee69693d 100644
--- a/dpnp/backend/extensions/lapack/getrf.cpp
+++ b/dpnp/backend/extensions/lapack/getrf.cpp
@@ -51,6 +51,7 @@ typedef sycl::event (*getrf_impl_fn_ptr_t)(sycl::queue,
                                            char *,
                                            std::int64_t,
                                            std::int64_t *,
+                                           std::int64_t *,
                                            std::vector<sycl::event> &,
                                            const std::vector<sycl::event> &);
 
@@ -62,6 +63,7 @@ static sycl::event getrf_impl(sycl::queue exec_q,
                               char *in_a,
                               std::int64_t lda,
                               std::int64_t *ipiv,
+                              std::int64_t *dev_info,
                               std::vector<sycl::event> &host_task_events,
                               const std::vector<sycl::event> &depends)
 {
@@ -106,9 +108,19 @@ static sycl::event getrf_impl(sycl::queue exec_q,
         if (scratchpad != nullptr) {
             sycl::free(scratchpad, exec_q);
         }
-        throw std::runtime_error(error_msg.str());
+
+        if (info < 0) {
+            throw std::runtime_error(error_msg.str());
+        }
     }
 
+    sycl::event write_info_event = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(getrf_event);
+        cgh.single_task([=]() { dev_info[0] = info; });
+    });
+
+    host_task_events.push_back(write_info_event);
+
     sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) {
         cgh.depends_on(getrf_event);
         auto ctx = exec_q.get_context();
@@ -122,6 +134,7 @@ sycl::event getrf(sycl::queue q,
                   const std::int64_t n,
                   dpctl::tensor::usm_ndarray a_array,
                   dpctl::tensor::usm_ndarray ipiv_array,
+                  dpctl::tensor::usm_ndarray dev_info_array,
                   const std::vector<sycl::event> &depends = {})
 {
 
@@ -144,12 +157,13 @@ sycl::event getrf(sycl::queue q,
     char *ipiv_array_data = ipiv_array.get_data();
     std::int64_t *d_ipiv = reinterpret_cast<std::int64_t *>(ipiv_array_data);
 
-    // std::vector<std::int64_t> ipiv(n);
-    // std::int64_t* d_ipiv = sycl::malloc_device<std::int64_t>(n, q);
+    char *dev_info_array_data = dev_info_array.get_data();
+    std::int64_t *d_dev_info =
+        reinterpret_cast<std::int64_t *>(dev_info_array_data);
 
     std::vector<sycl::event> host_task_events;
-    sycl::event getrf_ev =
-        getrf_fn(q, n, a_array_data, lda, d_ipiv, host_task_events, depends);
+    sycl::event getrf_ev = getrf_fn(q, n, a_array_data, lda, d_ipiv, d_dev_info,
+                                    host_task_events, depends);
 
     return getrf_ev;
 }
diff --git a/dpnp/backend/extensions/lapack/getrf.hpp b/dpnp/backend/extensions/lapack/getrf.hpp
index 0fabf213960e..583f4bf2d1e8 100644
--- a/dpnp/backend/extensions/lapack/getrf.hpp
+++ b/dpnp/backend/extensions/lapack/getrf.hpp
@@ -42,6 +42,7 @@ sycl::event getrf(sycl::queue exec_q,
                   const std::int64_t n,
                   dpctl::tensor::usm_ndarray a_array,
                   dpctl::tensor::usm_ndarray ipiv_array,
+                  dpctl::tensor::usm_ndarray dev_info_array,
                   const std::vector<sycl::event> &depends);
 
 extern void init_getrf_dispatch_vector(void);
diff --git a/dpnp/backend/extensions/lapack/lapack_py.cpp b/dpnp/backend/extensions/lapack/lapack_py.cpp
index 92f88c560159..38320d205aa3 100644
--- a/dpnp/backend/extensions/lapack/lapack_py.cpp
+++ b/dpnp/backend/extensions/lapack/lapack_py.cpp
@@ -73,5 +73,6 @@ PYBIND11_MODULE(_lapack_impl, m)
           "Call `getrf` from OneMKL LAPACK library to return "
           "the LU factorization of a general n x n matrix",
           py::arg("sycl_queue"), py::arg("n"), py::arg("a_array"),
-          py::arg("ipiv_array"), py::arg("depends") = py::list());
+          py::arg("ipiv_array"), py::arg("dev_info_array"),
+          py::arg("depends") = py::list());
 }

From 71c2e96a47f6fabb529be229f8fbd8cfe04bbe3a Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 25 Oct 2023 18:38:38 +0200
Subject: [PATCH 03/55] Add an extra dev_info return to _lu_factor

---
 dpnp/linalg/dpnp_utils_linalg.py | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index 98ac1ec8024a..915000785717 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -32,7 +32,7 @@
 import dpnp
 import dpnp.backend.extensions.lapack._lapack_impl as li
 
-__all__ = ["dpnp_eigh"]
+__all__ = ["dpnp_eigh", "_lu_factor"]
 
 _jobz = {"N": 0, "V": 1}
 _upper_lower = {"U": 0, "L": 1}
@@ -231,20 +231,28 @@ def _lu_factor(a, dtype=dpnp.float32):
             dpnp.float64 if exec_q.sycl_device.has_aspect_fp64 else dpnp.float32
         )
 
-    a_h = dpnp.empty_like(a, order="C", dtype=res_type)
+    a_h = dpnp.empty_like(a, order="F", dtype=res_type)
     ipiv_h = dpnp.empty(
         n, dtype=dpnp.int64, usm_type=a.usm_type, sycl_queue=a.sycl_queue
     )
+    dev_info_h = dpnp.empty(
+        1, dtype=dpnp.int64, usm_type=a.usm_type, sycl_queue=a.sycl_queue
+    )
 
     a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
         src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a.sycl_queue
     )
 
     lapack_ev = li._getrf(
-        exec_q, n, a_h.get_array(), ipiv_h.get_array(), [a_copy_ev]
+        exec_q,
+        n,
+        a_h.get_array(),
+        ipiv_h.get_array(),
+        dev_info_h.get_array(),
+        [a_copy_ev],
     )
 
-    if a_order != "C":
+    if a_order != "F":
         # need to align order of the result of solutions with the
         # input array of multiple dependent variables
         a_h_f = dpnp.empty_like(a_h, order=a_order)
@@ -255,9 +263,9 @@ def _lu_factor(a, dtype=dpnp.float32):
             depends=[lapack_ev],
         )
         ht_copy_out_ev.wait()
-        out_v = (a_h_f, ipiv_h)
+        out_v = (a_h_f, ipiv_h, dev_info_h)
     else:
-        out_v = (a_h, ipiv_h)
+        out_v = (a_h, ipiv_h, dev_info_h)
 
     lapack_ev.wait()
     a_ht_copy_ev.wait()

From b24c8c590e1dd82cb8f93d5de82b475d61a6a99c Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 26 Oct 2023 11:05:38 +0200
Subject: [PATCH 04/55] qwe

---
 dpnp/linalg/dpnp_iface_linalg.py              | 130 ++++++++++++++++--
 qwe.py                                        |  39 ++++++
 tests/test_linalg.py                          |  14 +-
 .../cupy/linalg_tests/test_norms.py           |   8 ++
 4 files changed, 174 insertions(+), 17 deletions(-)
 create mode 100644 qwe.py
 create mode 100644 tests/third_party/cupy/linalg_tests/test_norms.py

diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py
index b41b96c70525..5815b1de4e1c 100644
--- a/dpnp/linalg/dpnp_iface_linalg.py
+++ b/dpnp/linalg/dpnp_iface_linalg.py
@@ -47,7 +47,7 @@
 from dpnp.dpnp_utils import *
 from dpnp.linalg.dpnp_algo_linalg import *
 
-from .dpnp_utils_linalg import dpnp_eigh
+from .dpnp_utils_linalg import dpnp_eigh, _lu_factor
 
 __all__ = [
     "cholesky",
@@ -157,17 +157,19 @@ def det(input):
         Determinant of `input`.
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(input, copy_when_nondefault_queue=False)
-    if x1_desc:
-        if x1_desc.ndim < 2:
-            pass
-        elif x1_desc.shape[-1] == x1_desc.shape[-2]:
-            result_obj = dpnp_det(x1_desc).get_pyobj()
-            result = dpnp.convert_single_elem_array_to_scalar(result_obj)
+    # x1_desc = dpnp.get_dpnp_descriptor(input, copy_when_nondefault_queue=False)
+    # if x1_desc:
+    #     if x1_desc.ndim < 2:
+    #         pass
+    #     elif x1_desc.shape[-1] == x1_desc.shape[-2]:
+    #         result_obj = dpnp_det(x1_desc).get_pyobj()
+    #         result = dpnp.convert_single_elem_array_to_scalar(result_obj)
 
-            return result
+    #         return result
+
+    # return call_origin(numpy.linalg.det, input)
 
-    return call_origin(numpy.linalg.det, input)
+    return _lu_factor(input)
 
 
 def eig(x1):
@@ -577,3 +579,111 @@ def svd(x1, full_matrices=True, compute_uv=True, hermitian=False):
     return call_origin(
         numpy.linalg.svd, x1, full_matrices, compute_uv, hermitian
     )
+
+"""
+def _lu_factor(a, dtype=dpnp.float32):
+    a_usm_arr = dpnp.get_usm_ndarray(a)
+    shape = a.shape[:-2]
+
+    # TODO: use dpnp.linalg.LinAlgError
+    if a.ndim < 2:
+        raise ValueError(
+            f"{a.ndim}-dimensional array given. The input "
+            "array must be at least two-dimensional"
+        )
+
+    n, m = a.shape[-2:]
+    # TODO: use dpnp.linalg.LinAlgError
+    if m != n:
+        raise ValueError("Last 2 dimensions of the input array must be square")
+
+    # orig_shape = a.shape
+
+    a_order = "C" if a.flags.c_contiguous else "F"
+
+    exec_q = a.sycl_queue
+    if exec_q is None:
+        raise ValueError(
+            "Execution placement can not be unambiguously inferred "
+            "from input arguments."
+        )
+
+    # TODO: Use linalg_common_type from #1598
+    if dpnp.issubdtype(a.dtype, dpnp.floating):
+        res_type = (
+            a.dtype if exec_q.sycl_device.has_aspect_fp64 else dpnp.float32
+        )
+    elif dpnp.issubdtype(a.dtype, dpnp.complexfloating):
+        res_type = (
+            a.dtype if exec_q.sycl_device.has_aspect_fp64 else dpnp.complex64
+        )
+    else:
+        res_type = (
+            dpnp.float64 if exec_q.sycl_device.has_aspect_fp64 else dpnp.float32
+        )
+
+    logdet_dtype = numpy.dtype(res_type)
+    dtype = numpy.dtype(res_type) #a.dtype
+
+    a_h = dpnp.empty_like(a, order="F", dtype=res_type)
+    ipiv_h = dpnp.empty(
+        n, dtype=dpnp.int64, usm_type=a.usm_type, sycl_queue=a.sycl_queue
+    )
+    dev_info_h = dpnp.empty(
+        1, dtype=dpnp.int64, usm_type=a.usm_type, sycl_queue=a.sycl_queue
+    )
+
+    a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
+        src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a.sycl_queue
+    )
+
+    lapack_ev = li._getrf(
+        exec_q, n, a_h.get_array(), ipiv_h.get_array(),dev_info_h.get_array(), [a_copy_ev]
+    )
+
+    if a_order != "F":
+        # need to align order of the result of solutions with the
+        # input array of multiple dependent variables
+        a_h_f = dpnp.empty_like(a_h, order=a_order)
+        ht_copy_out_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray(
+            src=a_h.get_array(),
+            dst=a_h_f.get_array(),
+            sycl_queue=a.sycl_queue,
+            depends=[lapack_ev],
+        )
+        ht_copy_out_ev.wait()
+        out_v = (a_h_f, ipiv_h, dev_info_h)
+    else:
+        out_v = (a_h, ipiv_h, dev_info_h)
+
+    lapack_ev.wait()
+    a_ht_copy_ev.wait()
+
+    lu_np = dpnp.asnumpy(out_v[0])
+    ipiv_np = dpnp.asnumpy(out_v[1])
+    dev_info_np = dpnp.asnumpy(out_v[2])
+
+
+    diag = numpy.diagonal(lu_np, axis1=-2, axis2=-1)
+    logdet = numpy.log(numpy.abs(diag)).sum(axis=-1)
+
+    non_zero = numpy.count_nonzero(ipiv_np != numpy.arange(1, n + 1), axis=-1)
+    if dtype.kind == "f":
+        non_zero += numpy.count_nonzero(diag < 0, axis=-1)
+
+    sign = (non_zero % 2) * -2 + 1
+    if dtype.kind == "c":
+        sign = sign * numpy.prod(diag / numpy.abs(diag), axis=-1)
+
+    sign = sign.astype(dtype)
+    logdet = logdet.astype(logdet_dtype, copy=False)
+    singular = dev_info_np > 0
+    res_sign = numpy.where(singular, dtype.type(0), sign).reshape(shape)
+    res_logdet = numpy.where(singular, logdet_dtype.type('-inf'), logdet).reshape(shape)
+
+    res = res_sign * numpy.exp(res_logdet)
+
+    return res
+    # return out_v
+
+"""
diff --git a/qwe.py b/qwe.py
new file mode 100644
index 000000000000..ab03b4a3b5ad
--- /dev/null
+++ b/qwe.py
@@ -0,0 +1,39 @@
+# import dpnp
+# from dpnp.linalg.
+# import numpy
+
+# a = numpy.arange(1,49,dtype='f4').reshape(2,2,3,4)
+# a = numpy.arange(1,5,dtype='f4').reshape(2,2)
+# a_dp = dpnp.array(a)
+# print(numpy.linalg.svd(a,compute_uv=False))
+# print("===================")
+# print(dpnp.linalg.svd(a_dp,compute_uv=False))
+# print("===================")
+# a_dp = dpnp.array(a,device='cpu')
+# print(dpnp.linalg.svd(a_dp,full_matrices=False))
+
+# print(numpy.linalg.svd(a,full_matrices=False))
+# res_np = numpy.linalg.svd(a)
+# print(res_np[0].shape)
+# print(res_np[1].shape)
+# print(res_np[2].shape)
+# print("===================")
+# print(dpnp.linalg.svd(a_dp, full_matrices=False))
+# res_dp = dpnp.linalg.svd(a_dp)
+# print(res_dp[0].shape)
+# print(res_dp[1].shape)
+# print(res_dp[2].shape)
+# print(a_dp)
+
+
+from dpnp.linalg.dpnp_utils_linalg import _lu_factor
+import dpnp
+import numpy
+import scipy
+
+a = numpy.array([[1, 2], [3, 4]],dtype="f4",order='C')
+a_dp = dpnp.array(a)
+
+print(scipy.linalg.lu_factor(a))
+print("===========================")
+print(_lu_factor(a_dp))
diff --git a/tests/test_linalg.py b/tests/test_linalg.py
index 7ce774d0abd6..89361795a6e2 100644
--- a/tests/test_linalg.py
+++ b/tests/test_linalg.py
@@ -106,18 +106,18 @@ def test_cond(arr, p):
         [[0, 0], [0, 0]],
         [[1, 2], [1, 2]],
         [[1, 2], [3, 4]],
-        [[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]],
-        [
-            [[[1, 2], [3, 4]], [[1, 2], [2, 1]]],
-            [[[1, 3], [3, 1]], [[0, 1], [1, 3]]],
-        ],
+        # [[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]],
+        # [
+        #     [[[1, 2], [3, 4]], [[1, 2], [2, 1]]],
+        #     [[[1, 3], [3, 1]], [[0, 1], [1, 3]]],
+        # ],
     ],
     ids=[
         "[[0, 0], [0, 0]]",
         "[[1, 2], [1, 2]]",
         "[[1, 2], [3, 4]]",
-        "[[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]]",
-        "[[[[1, 2], [3, 4]], [[1, 2], [2, 1]]], [[[1, 3], [3, 1]], [[0, 1], [1, 3]]]]",
+        # "[[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]]",
+        # "[[[[1, 2], [3, 4]], [[1, 2], [2, 1]]], [[[1, 3], [3, 1]], [[0, 1], [1, 3]]]]",
     ],
 )
 def test_det(array):
diff --git a/tests/third_party/cupy/linalg_tests/test_norms.py b/tests/third_party/cupy/linalg_tests/test_norms.py
new file mode 100644
index 000000000000..d4e95a5641f0
--- /dev/null
+++ b/tests/third_party/cupy/linalg_tests/test_norms.py
@@ -0,0 +1,8 @@
+import unittest
+
+import numpy
+import pytest
+
+import dpnp as cupy
+from tests.helper import has_support_aspect64
+from tests.third_party.cupy import testing

From 500df36eb3b8de741500698b7b9ef00f81db1cb8 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Tue, 31 Oct 2023 11:29:05 +0100
Subject: [PATCH 05/55] Add a logic for a.ndim > 2 in _lu_factor

---
 dpnp/linalg/dpnp_utils_linalg.py | 134 ++++++++++++++++++++-----------
 1 file changed, 85 insertions(+), 49 deletions(-)

diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index 915000785717..99e967a14aab 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -166,7 +166,7 @@ def dpnp_eigh(a, UPLO):
         return w, out_v
 
 
-def _lu_factor(a, dtype=dpnp.float32):
+def _lu_factor(a, res_type):
     """Compute pivoted LU decomposition.
 
     Decompose a given batch of square matrices. Inputs and outputs are
@@ -192,8 +192,6 @@ def _lu_factor(a, dtype=dpnp.float32):
 
     """
 
-    a_usm_arr = dpnp.get_usm_ndarray(a)
-
     # TODO: use dpnp.linalg.LinAlgError
     if a.ndim < 2:
         raise ValueError(
@@ -217,57 +215,95 @@ def _lu_factor(a, dtype=dpnp.float32):
             "from input arguments."
         )
 
-    # TODO: Use linalg_common_type from #1598
-    if dpnp.issubdtype(a.dtype, dpnp.floating):
-        res_type = (
-            a.dtype if exec_q.sycl_device.has_aspect_fp64 else dpnp.float32
+    if a.ndim > 2:
+        orig_shape = a.shape
+        a = a.reshape(-1, n, n)
+        batch_size = a.shape[0]
+        a_usm_arr = dpnp.get_usm_ndarray(a)
+
+        a_vecs = [None] * batch_size
+        ipiv_h = dpnp.empty(
+            (batch_size, n),
+            dtype=dpnp.int64,
+            usm_type=a.usm_type,
+            sycl_queue=a.sycl_queue,
         )
-    elif dpnp.issubdtype(a.dtype, dpnp.complexfloating):
-        res_type = (
-            a.dtype if exec_q.sycl_device.has_aspect_fp64 else dpnp.complex64
+        dev_info_h = dpnp.empty(
+            (batch_size,),
+            dtype=dpnp.int64,
+            usm_type=a.usm_type,
+            sycl_queue=a.sycl_queue,
         )
+        a_ht_copy_ev = [None] * batch_size
+        ht_lapack_ev = [None] * batch_size
+
+        for i in range(batch_size):
+            a_vecs[i] = dpnp.empty_like(a[i], order="F", dtype=res_type)
+            a_ht_copy_ev[i], a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
+                src=a_usm_arr[i],
+                dst=a_vecs[i].get_array(),
+                sycl_queue=a.sycl_queue,
+            )
+
+            ht_lapack_ev[i] = li._getrf(
+                exec_q,
+                n,
+                a_vecs[i].get_array(),
+                ipiv_h[i].get_array(),
+                dev_info_h[i].get_array(),
+                [a_copy_ev],
+            )
+
+        for i in range(batch_size):
+            ht_lapack_ev[i].wait()
+            a_ht_copy_ev[i].wait()
+
+        out_v = dpnp.array(a_vecs, order=a_order).reshape(orig_shape)
+        out_ipiv = ipiv_h.reshape(orig_shape[:-1])
+        out_dev_info = dev_info_h.reshape(orig_shape[:-2])
+
+        return (out_v, out_ipiv, out_dev_info)
+
     else:
-        res_type = (
-            dpnp.float64 if exec_q.sycl_device.has_aspect_fp64 else dpnp.float32
+        a_usm_arr = dpnp.get_usm_ndarray(a)
+
+        a_h = dpnp.empty_like(a, order="F", dtype=res_type)
+        ipiv_h = dpnp.empty(
+            n, dtype=dpnp.int64, usm_type=a.usm_type, sycl_queue=a.sycl_queue
+        )
+        dev_info_h = dpnp.empty(
+            1, dtype=dpnp.int64, usm_type=a.usm_type, sycl_queue=a.sycl_queue
         )
 
-    a_h = dpnp.empty_like(a, order="F", dtype=res_type)
-    ipiv_h = dpnp.empty(
-        n, dtype=dpnp.int64, usm_type=a.usm_type, sycl_queue=a.sycl_queue
-    )
-    dev_info_h = dpnp.empty(
-        1, dtype=dpnp.int64, usm_type=a.usm_type, sycl_queue=a.sycl_queue
-    )
-
-    a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
-        src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a.sycl_queue
-    )
-
-    lapack_ev = li._getrf(
-        exec_q,
-        n,
-        a_h.get_array(),
-        ipiv_h.get_array(),
-        dev_info_h.get_array(),
-        [a_copy_ev],
-    )
-
-    if a_order != "F":
-        # need to align order of the result of solutions with the
-        # input array of multiple dependent variables
-        a_h_f = dpnp.empty_like(a_h, order=a_order)
-        ht_copy_out_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray(
-            src=a_h.get_array(),
-            dst=a_h_f.get_array(),
-            sycl_queue=a.sycl_queue,
-            depends=[lapack_ev],
+        a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
+            src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a.sycl_queue
         )
-        ht_copy_out_ev.wait()
-        out_v = (a_h_f, ipiv_h, dev_info_h)
-    else:
-        out_v = (a_h, ipiv_h, dev_info_h)
 
-    lapack_ev.wait()
-    a_ht_copy_ev.wait()
+        lapack_ev = li._getrf(
+            exec_q,
+            n,
+            a_h.get_array(),
+            ipiv_h.get_array(),
+            dev_info_h.get_array(),
+            [a_copy_ev],
+        )
+
+        if a_order != "F":
+            # need to align order of the result of solutions with the
+            # input array of multiple dependent variables
+            a_h_f = dpnp.empty_like(a_h, order=a_order)
+            ht_copy_out_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray(
+                src=a_h.get_array(),
+                dst=a_h_f.get_array(),
+                sycl_queue=a.sycl_queue,
+                depends=[lapack_ev],
+            )
+            ht_copy_out_ev.wait()
+            out_v = (a_h_f, ipiv_h, dev_info_h)
+        else:
+            out_v = (a_h, ipiv_h, dev_info_h)
+
+        lapack_ev.wait()
+        a_ht_copy_ev.wait()
 
-    return out_v
+        return out_v

From b35e28268269df4c50a0f4c2bac0a22c484d9fb4 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Tue, 31 Oct 2023 11:31:23 +0100
Subject: [PATCH 06/55] Add an implementation of dpnp.linalg.slogdet

---
 dpnp/linalg/dpnp_iface_linalg.py | 161 +++++++++++++++++--------------
 1 file changed, 86 insertions(+), 75 deletions(-)

diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py
index 5815b1de4e1c..18d8f359ae42 100644
--- a/dpnp/linalg/dpnp_iface_linalg.py
+++ b/dpnp/linalg/dpnp_iface_linalg.py
@@ -47,7 +47,7 @@
 from dpnp.dpnp_utils import *
 from dpnp.linalg.dpnp_algo_linalg import *
 
-from .dpnp_utils_linalg import dpnp_eigh, _lu_factor
+from .dpnp_utils_linalg import _lu_factor, dpnp_eigh
 
 __all__ = [
     "cholesky",
@@ -63,6 +63,7 @@
     "norm",
     "qr",
     "svd",
+    "slogdet",
 ]
 
 
@@ -580,10 +581,39 @@ def svd(x1, full_matrices=True, compute_uv=True, hermitian=False):
         numpy.linalg.svd, x1, full_matrices, compute_uv, hermitian
     )
 
-"""
-def _lu_factor(a, dtype=dpnp.float32):
-    a_usm_arr = dpnp.get_usm_ndarray(a)
-    shape = a.shape[:-2]
+
+def slogdet(a):
+    """Returns sign and logarithm of the determinant of an array.
+
+    It calculates the natural logarithm of the determinant of a given value.
+
+    Args:
+        a (dpnp.ndarray): The input matrix with dimension ``(..., N, N)``.
+
+    Returns:
+        tuple of :class:`~dpnp.ndarray`:
+            It returns a tuple ``(sign, logdet)``. ``sign`` represents each
+            sign of the determinant as a real number ``0``, ``1`` or ``-1``.
+            'logdet' represents the natural logarithm of the absolute of the
+            determinant.
+            If the determinant is zero, ``sign`` will be ``0`` and ``logdet``
+            will be ``-inf``.
+            The shapes of both ``sign`` and ``logdet`` are equal to
+            ``a.shape[:-2]``.
+
+    .. warning::
+        This function calls one or more cuSOLVER routine(s) which may yield
+        invalid results if input conditions are not met.
+        To detect these invalid results, you can set the `linalg`
+        configuration to a value that is not `ignore` in
+        :func:`cupyx.errstate` or :func:`cupyx.seterr`.
+
+    .. warning::
+        To produce the same results as :func:`numpy.linalg.slogdet` for
+        singular inputs, set the `linalg` configuration to `raise`.
+
+    .. seealso:: :func:`numpy.linalg.slogdet`
+    """
 
     # TODO: use dpnp.linalg.LinAlgError
     if a.ndim < 2:
@@ -597,17 +627,8 @@ def _lu_factor(a, dtype=dpnp.float32):
     if m != n:
         raise ValueError("Last 2 dimensions of the input array must be square")
 
-    # orig_shape = a.shape
-
-    a_order = "C" if a.flags.c_contiguous else "F"
-
     exec_q = a.sycl_queue
-    if exec_q is None:
-        raise ValueError(
-            "Execution placement can not be unambiguously inferred "
-            "from input arguments."
-        )
-
+    # dtype, sign_dtype = _util.linalg_common_type(a)
     # TODO: Use linalg_common_type from #1598
     if dpnp.issubdtype(a.dtype, dpnp.floating):
         res_type = (
@@ -622,68 +643,58 @@ def _lu_factor(a, dtype=dpnp.float32):
             dpnp.float64 if exec_q.sycl_device.has_aspect_fp64 else dpnp.float32
         )
 
-    logdet_dtype = numpy.dtype(res_type)
-    dtype = numpy.dtype(res_type) #a.dtype
-
-    a_h = dpnp.empty_like(a, order="F", dtype=res_type)
-    ipiv_h = dpnp.empty(
-        n, dtype=dpnp.int64, usm_type=a.usm_type, sycl_queue=a.sycl_queue
-    )
-    dev_info_h = dpnp.empty(
-        1, dtype=dpnp.int64, usm_type=a.usm_type, sycl_queue=a.sycl_queue
-    )
-
-    a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
-        src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a.sycl_queue
-    )
-
-    lapack_ev = li._getrf(
-        exec_q, n, a_h.get_array(), ipiv_h.get_array(),dev_info_h.get_array(), [a_copy_ev]
+    res_type = dpnp.dtype(res_type)
+    logdet_dtype = dpnp.dtype(res_type.char.lower())
+
+    a_shape = a.shape
+    shape = a_shape[:-2]
+    n = a_shape[-2]
+
+    if a.size == 0:
+        # empty batch (result is empty, too) or empty matrices det([[]]) == 1
+        sign = dpnp.ones(shape, res_type)
+        logdet = dpnp.zeros(shape, logdet_dtype)
+        return sign, logdet
+
+    lu, ipiv, dev_info = _lu_factor(a, res_type)
+
+    # dev_info < 0 means illegal value (in dimensions, strides, and etc.) that
+    # should never happen even if the matrix contains nan or inf.
+    # TODO(kataoka): assert dev_info >= 0 if synchronization is allowed for
+    # debugging purposes.
+
+    # Transposing 'lu' to swap the last two axes for compatibility
+    # with 'dpnp.diagonal' as it does not support 'axis1' and 'axis2' arguments.
+    # TODO: Replace with 'dpnp.diagonal(lu, axis1=-2, axis2=-1)' when supported.
+    lu_transposed = lu.transpose(-2, -1, *range(lu.ndim - 2))
+    diag = dpnp.diagonal(lu_transposed)
+
+    logdet = dpnp.log(dpnp.abs(diag)).sum(axis=-1)
+
+    # Transposing 'ipiv' and reshaping 'arange_values' for element-wise comparison
+    # as 'dpnp.count_nonzero' does not support the 'axis' parameter.
+    ipiv_transposed = ipiv.transpose(-1, *range(ipiv.ndim - 1))
+    arange_shape = [1] * a.ndim
+    arange_shape[-2] = n
+    arange_values = dpnp.arange(1, n + 1, sycl_queue=exec_q).reshape(
+        arange_shape
     )
 
-    if a_order != "F":
-        # need to align order of the result of solutions with the
-        # input array of multiple dependent variables
-        a_h_f = dpnp.empty_like(a_h, order=a_order)
-        ht_copy_out_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray(
-            src=a_h.get_array(),
-            dst=a_h_f.get_array(),
-            sycl_queue=a.sycl_queue,
-            depends=[lapack_ev],
-        )
-        ht_copy_out_ev.wait()
-        out_v = (a_h_f, ipiv_h, dev_info_h)
-    else:
-        out_v = (a_h, ipiv_h, dev_info_h)
-
-    lapack_ev.wait()
-    a_ht_copy_ev.wait()
-
-    lu_np = dpnp.asnumpy(out_v[0])
-    ipiv_np = dpnp.asnumpy(out_v[1])
-    dev_info_np = dpnp.asnumpy(out_v[2])
+    # ipiv is 1-origin
+    # TODO: Replace with 'dpnp.count_nonzero(ipiv != dpnp.arange(1, n + 1), axis=-1)'
+    # when supported.
+    non_zero = dpnp.count_nonzero(ipiv_transposed != arange_values)
+    if res_type.kind == "f":
+        non_zero += dpnp.count_nonzero(diag < 0)
 
+    sign = -(1 ** (non_zero % 2))
+    if res_type.kind == "c":
+        sign = sign * dpnp.prod(diag / dpnp.abs(diag), axis=-1)
 
-    diag = numpy.diagonal(lu_np, axis1=-2, axis2=-1)
-    logdet = numpy.log(numpy.abs(diag)).sum(axis=-1)
-
-    non_zero = numpy.count_nonzero(ipiv_np != numpy.arange(1, n + 1), axis=-1)
-    if dtype.kind == "f":
-        non_zero += numpy.count_nonzero(diag < 0, axis=-1)
-
-    sign = (non_zero % 2) * -2 + 1
-    if dtype.kind == "c":
-        sign = sign * numpy.prod(diag / numpy.abs(diag), axis=-1)
-
-    sign = sign.astype(dtype)
+    sign = sign.astype(res_type)
     logdet = logdet.astype(logdet_dtype, copy=False)
-    singular = dev_info_np > 0
-    res_sign = numpy.where(singular, dtype.type(0), sign).reshape(shape)
-    res_logdet = numpy.where(singular, logdet_dtype.type('-inf'), logdet).reshape(shape)
-
-    res = res_sign * numpy.exp(res_logdet)
-
-    return res
-    # return out_v
-
-"""
+    singular = dev_info > 0
+    return (
+        dpnp.where(singular, res_type.type(0), sign).reshape(shape),
+        dpnp.where(singular, logdet_dtype.type("-inf"), logdet).reshape(shape),
+    )

From 301fc2af0ffb17693dcc93bd0e110f947544b4e0 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Tue, 31 Oct 2023 11:35:35 +0100
Subject: [PATCH 07/55] Add a new test_norms.py file in cupy tests

---
 .../cupy/linalg_tests/test_norms.py           | 53 ++++++++++++++++++-
 1 file changed, 52 insertions(+), 1 deletion(-)

diff --git a/tests/third_party/cupy/linalg_tests/test_norms.py b/tests/third_party/cupy/linalg_tests/test_norms.py
index d4e95a5641f0..71c8922a23ea 100644
--- a/tests/third_party/cupy/linalg_tests/test_norms.py
+++ b/tests/third_party/cupy/linalg_tests/test_norms.py
@@ -4,5 +4,56 @@
 import pytest
 
 import dpnp as cupy
-from tests.helper import has_support_aspect64
 from tests.third_party.cupy import testing
+
+
+class TestSlogdet(unittest.TestCase):
+    @testing.for_dtypes("fd")
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_slogdet(self, xp, dtype):
+        a = testing.shaped_arange((2, 2), xp, dtype) + 1
+        sign, logdet = xp.linalg.slogdet(a)
+        return sign, logdet
+
+    @testing.for_dtypes("fd")
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_slogdet_3(self, xp, dtype):
+        a = testing.shaped_arange((2, 2, 2), xp, dtype) + 1
+        sign, logdet = xp.linalg.slogdet(a)
+        return sign, logdet
+
+    @testing.for_dtypes("fd")
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_slogdet_4(self, xp, dtype):
+        a = testing.shaped_arange((2, 2, 2, 2), xp, dtype) + 1
+        sign, logdet = xp.linalg.slogdet(a)
+        return sign, logdet
+
+    @testing.for_dtypes("fd")
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_slogdet_singular(self, xp, dtype):
+        a = xp.zeros((3, 3), dtype=dtype)
+        sign, logdet = xp.linalg.slogdet(a)
+        return sign, logdet
+
+    # @testing.for_dtypes("fd")
+    # @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    # def test_slogdet_singular_errstate(self, xp, dtype):
+    #     a = xp.zeros((3, 3), dtype)
+    #     with cupyx.errstate(linalg="raise"):
+    #         # `cupy.linalg.slogdet` internally catches `dev_info < 0` from
+    #         # cuSOLVER, which should not affect `dev_info > 0` cases.
+    #         sign, logdet = xp.linalg.slogdet(a)
+    #     return sign, logdet
+
+    @testing.for_dtypes("fd")
+    def test_slogdet_one_dim(self, dtype):
+        for xp in (numpy, cupy):
+            a = testing.shaped_arange((2,), xp, dtype)
+            if xp is numpy:
+                with pytest.raises(numpy.linalg.LinAlgError):
+                    xp.linalg.slogdet(a)
+            else:
+                # Replace with dpnp.linalg.LinAlgError
+                with pytest.raises(ValueError):
+                    xp.linalg.slogdet(a)

From 9aba0146ed7e5ead9fa8587400e6fb80825eb0cf Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Tue, 31 Oct 2023 11:37:43 +0100
Subject: [PATCH 08/55] Expand test scope in public CI

---
 .github/workflows/conda-package.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml
index a909d860b9a5..2c9598899b98 100644
--- a/.github/workflows/conda-package.yml
+++ b/.github/workflows/conda-package.yml
@@ -28,6 +28,7 @@ env:
       test_umath.py
       test_usm_type.py
       third_party/cupy/core_tests
+      third_party/cupy/linalg_tests/test_norms.py
       third_party/cupy/linalg_tests/test_product.py
       third_party/cupy/logic_tests/test_comparison.py
       third_party/cupy/logic_tests/test_truth.py

From a8789e46e7985b149e76a67a99712b19c40ed38b Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 22 Nov 2023 20:32:54 +0100
Subject: [PATCH 09/55] A small update _lu_factor func

---
 dpnp/linalg/dpnp_utils_linalg.py | 35 ++++++++++++++------------------
 1 file changed, 15 insertions(+), 20 deletions(-)

diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index 99e967a14aab..7e74884dda18 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -175,7 +175,7 @@ def _lu_factor(a, res_type):
     Args:
         a (dpnp.ndarray): The input matrix with dimension ``(..., N, N)``.
            The dimension condition is not checked.
-        dtype (dpnp.dtype): float32, float64, complex64, or complex128.
+        res_type (dpnp.dtype): float32, float64, complex64 or complex128.
 
     Returns:
         tuple:
@@ -204,16 +204,11 @@ def _lu_factor(a, res_type):
     if m != n:
         raise ValueError("Last 2 dimensions of the input array must be square")
 
-    # orig_shape = a.shape
-
     a_order = "C" if a.flags.c_contiguous else "F"
+    a_sycl_queue = a.sycl_queue
+    a_usm_type = a.usm_type
 
-    exec_q = a.sycl_queue
-    if exec_q is None:
-        raise ValueError(
-            "Execution placement can not be unambiguously inferred "
-            "from input arguments."
-        )
+    # TODO: use getrf_batch
 
     if a.ndim > 2:
         orig_shape = a.shape
@@ -225,14 +220,14 @@ def _lu_factor(a, res_type):
         ipiv_h = dpnp.empty(
             (batch_size, n),
             dtype=dpnp.int64,
-            usm_type=a.usm_type,
-            sycl_queue=a.sycl_queue,
+            usm_type=a_usm_type,
+            sycl_queue=a_sycl_queue,
         )
         dev_info_h = dpnp.empty(
             (batch_size,),
             dtype=dpnp.int64,
-            usm_type=a.usm_type,
-            sycl_queue=a.sycl_queue,
+            usm_type=a_usm_type,
+            sycl_queue=a_sycl_queue,
         )
         a_ht_copy_ev = [None] * batch_size
         ht_lapack_ev = [None] * batch_size
@@ -242,11 +237,11 @@ def _lu_factor(a, res_type):
             a_ht_copy_ev[i], a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
                 src=a_usm_arr[i],
                 dst=a_vecs[i].get_array(),
-                sycl_queue=a.sycl_queue,
+                sycl_queue=a_sycl_queue,
             )
 
             ht_lapack_ev[i] = li._getrf(
-                exec_q,
+                a_sycl_queue,
                 n,
                 a_vecs[i].get_array(),
                 ipiv_h[i].get_array(),
@@ -269,18 +264,18 @@ def _lu_factor(a, res_type):
 
         a_h = dpnp.empty_like(a, order="F", dtype=res_type)
         ipiv_h = dpnp.empty(
-            n, dtype=dpnp.int64, usm_type=a.usm_type, sycl_queue=a.sycl_queue
+            n, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue
         )
         dev_info_h = dpnp.empty(
-            1, dtype=dpnp.int64, usm_type=a.usm_type, sycl_queue=a.sycl_queue
+            1, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue
         )
 
         a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
-            src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a.sycl_queue
+            src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a_sycl_queue
         )
 
         lapack_ev = li._getrf(
-            exec_q,
+            a_sycl_queue,
             n,
             a_h.get_array(),
             ipiv_h.get_array(),
@@ -295,7 +290,7 @@ def _lu_factor(a, res_type):
             ht_copy_out_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray(
                 src=a_h.get_array(),
                 dst=a_h_f.get_array(),
-                sycl_queue=a.sycl_queue,
+                sycl_queue=a_sycl_queue,
                 depends=[lapack_ev],
             )
             ht_copy_out_ev.wait()

From 59642f6f5b12262282f1f5bb979f218328d6bf2c Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 23 Nov 2023 13:14:26 +0100
Subject: [PATCH 10/55] Remove w/a for dpnp.count_nonzero in slogdet

---
 dpnp/linalg/dpnp_iface_linalg.py | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py
index 18d8f359ae42..25dcb3b03551 100644
--- a/dpnp/linalg/dpnp_iface_linalg.py
+++ b/dpnp/linalg/dpnp_iface_linalg.py
@@ -671,21 +671,10 @@ def slogdet(a):
 
     logdet = dpnp.log(dpnp.abs(diag)).sum(axis=-1)
 
-    # Transposing 'ipiv' and reshaping 'arange_values' for element-wise comparison
-    # as 'dpnp.count_nonzero' does not support the 'axis' parameter.
-    ipiv_transposed = ipiv.transpose(-1, *range(ipiv.ndim - 1))
-    arange_shape = [1] * a.ndim
-    arange_shape[-2] = n
-    arange_values = dpnp.arange(1, n + 1, sycl_queue=exec_q).reshape(
-        arange_shape
-    )
-
     # ipiv is 1-origin
-    # TODO: Replace with 'dpnp.count_nonzero(ipiv != dpnp.arange(1, n + 1), axis=-1)'
-    # when supported.
-    non_zero = dpnp.count_nonzero(ipiv_transposed != arange_values)
+    non_zero = dpnp.count_nonzero(ipiv != dpnp.arange(1, n + 1), axis=-1)
     if res_type.kind == "f":
-        non_zero += dpnp.count_nonzero(diag < 0)
+        non_zero += dpnp.count_nonzero(diag < 0, axis=-1)
 
     sign = -(1 ** (non_zero % 2))
     if res_type.kind == "c":

From db45555a5a6c6cafefc04777f9b77a8ed034601f Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 23 Nov 2023 15:36:45 +0100
Subject: [PATCH 11/55] getrf returns pair of events and uses
 dpctl.utils.keep_args_alive

---
 dpnp/backend/extensions/lapack/getrf.cpp | 18 +++++++++++-------
 dpnp/backend/extensions/lapack/getrf.hpp | 13 +++++++------
 dpnp/linalg/dpnp_utils_linalg.py         |  6 +++---
 3 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp
index d48aee69693d..b7d830f63c24 100644
--- a/dpnp/backend/extensions/lapack/getrf.cpp
+++ b/dpnp/backend/extensions/lapack/getrf.cpp
@@ -130,12 +130,13 @@ static sycl::event getrf_impl(sycl::queue exec_q,
     return getrf_event;
 }
 
-sycl::event getrf(sycl::queue q,
-                  const std::int64_t n,
-                  dpctl::tensor::usm_ndarray a_array,
-                  dpctl::tensor::usm_ndarray ipiv_array,
-                  dpctl::tensor::usm_ndarray dev_info_array,
-                  const std::vector<sycl::event> &depends = {})
+std::pair<sycl::event, sycl::event>
+    getrf(sycl::queue q,
+          const std::int64_t n,
+          dpctl::tensor::usm_ndarray a_array,
+          dpctl::tensor::usm_ndarray ipiv_array,
+          dpctl::tensor::usm_ndarray dev_info_array,
+          const std::vector<sycl::event> &depends)
 {
 
     auto array_types = dpctl_td_ns::usm_ndarray_types();
@@ -165,7 +166,10 @@ sycl::event getrf(sycl::queue q,
     sycl::event getrf_ev = getrf_fn(q, n, a_array_data, lda, d_ipiv, d_dev_info,
                                     host_task_events, depends);
 
-    return getrf_ev;
+    sycl::event args_ev = dpctl::utils::keep_args_alive(
+        q, {a_array, ipiv_array, dev_info_array}, host_task_events);
+
+    return std::make_pair(args_ev, getrf_ev);
 }
 
 template <typename fnT, typename T>
diff --git a/dpnp/backend/extensions/lapack/getrf.hpp b/dpnp/backend/extensions/lapack/getrf.hpp
index 583f4bf2d1e8..2b5d7c18ce47 100644
--- a/dpnp/backend/extensions/lapack/getrf.hpp
+++ b/dpnp/backend/extensions/lapack/getrf.hpp
@@ -38,12 +38,13 @@ namespace ext
 {
 namespace lapack
 {
-sycl::event getrf(sycl::queue exec_q,
-                  const std::int64_t n,
-                  dpctl::tensor::usm_ndarray a_array,
-                  dpctl::tensor::usm_ndarray ipiv_array,
-                  dpctl::tensor::usm_ndarray dev_info_array,
-                  const std::vector<sycl::event> &depends);
+extern std::pair<sycl::event, sycl::event>
+    getrf(sycl::queue exec_q,
+          const std::int64_t n,
+          dpctl::tensor::usm_ndarray a_array,
+          dpctl::tensor::usm_ndarray ipiv_array,
+          dpctl::tensor::usm_ndarray dev_info_array,
+          const std::vector<sycl::event> &depends = {});
 
 extern void init_getrf_dispatch_vector(void);
 } // namespace lapack
diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index 7e74884dda18..435263701086 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -240,7 +240,7 @@ def _lu_factor(a, res_type):
                 sycl_queue=a_sycl_queue,
             )
 
-            ht_lapack_ev[i] = li._getrf(
+            ht_lapack_ev[i], _ = li._getrf(
                 a_sycl_queue,
                 n,
                 a_vecs[i].get_array(),
@@ -274,7 +274,7 @@ def _lu_factor(a, res_type):
             src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a_sycl_queue
         )
 
-        lapack_ev = li._getrf(
+        ht_lapack_ev, lapack_ev = li._getrf(
             a_sycl_queue,
             n,
             a_h.get_array(),
@@ -298,7 +298,7 @@ def _lu_factor(a, res_type):
         else:
             out_v = (a_h, ipiv_h, dev_info_h)
 
-        lapack_ev.wait()
+        ht_lapack_ev.wait()
         a_ht_copy_ev.wait()
 
         return out_v

From 0350d86e4bbc28a779e2f98146c9cde2e918001f Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Mon, 27 Nov 2023 12:58:23 +0100
Subject: [PATCH 12/55] Update dpnp.linalg.det using slogdet

---
 dpnp/linalg/dpnp_iface_linalg.py | 57 ++++++++++++++++++++++----------
 1 file changed, 40 insertions(+), 17 deletions(-)

diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py
index 25dcb3b03551..27bd09878926 100644
--- a/dpnp/linalg/dpnp_iface_linalg.py
+++ b/dpnp/linalg/dpnp_iface_linalg.py
@@ -143,34 +143,57 @@ def cond(input, p=None):
     return call_origin(numpy.linalg.cond, input, p)
 
 
-def det(input):
+def det(a):
     """
     Compute the determinant of an array.
 
+    For full documentation refer to :obj:`numpy.linalg.det`.
+
     Parameters
     ----------
-    input : (..., M, M) array_like
+    a : (..., M, M) dpnp.ndarray
         Input array to compute determinants for.
 
     Returns
     -------
-    det : (...) array_like
-        Determinant of `input`.
-    """
+    det : (...) dpnp.ndarray
+        Determinant of `a`.
+
+    Limitations
+    -----------
+    Parameter `a` is supported as :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`.
+    Input array data types are limited by supported DPNP :ref:`Data types`.
+
+    See Also
+    --------
+    :obj:`dpnp.linalg.slogdet` : Returns sign and logarithm of the determinant of an array.
 
-    # x1_desc = dpnp.get_dpnp_descriptor(input, copy_when_nondefault_queue=False)
-    # if x1_desc:
-    #     if x1_desc.ndim < 2:
-    #         pass
-    #     elif x1_desc.shape[-1] == x1_desc.shape[-2]:
-    #         result_obj = dpnp_det(x1_desc).get_pyobj()
-    #         result = dpnp.convert_single_elem_array_to_scalar(result_obj)
+    Examples
+    --------
+    The determinant of a 2-D array [[a, b], [c, d]] is ad - bc:
+
+    >>> import dpnp as dp
+    >>> a = dp.array([[1, 2], [3, 4]])
+    >>> dp.linalg.det(a)
+    array(-2.)
+
+    Computing determinants for a stack of matrices:
 
-    #         return result
+    >>> a = dp.array([ [[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]] ])
+    >>> a.shape
+    (3, 2, 2)
+    >>> dp.linalg.det(a)
+    array([-2., -3., -8.])
 
-    # return call_origin(numpy.linalg.det, input)
+    """
+
+    if not dpnp.is_supported_array_type(a):
+        raise TypeError(
+            "An array must be any of supported type, but got {}".format(type(a))
+        )
 
-    return _lu_factor(input)
+    sign, logdet = slogdet(a)
+    return sign * dpnp.exp(logdet)
 
 
 def eig(x1):
@@ -652,8 +675,8 @@ def slogdet(a):
 
     if a.size == 0:
         # empty batch (result is empty, too) or empty matrices det([[]]) == 1
-        sign = dpnp.ones(shape, res_type)
-        logdet = dpnp.zeros(shape, logdet_dtype)
+        sign = dpnp.ones(shape, dtype=res_type)
+        logdet = dpnp.zeros(shape, dtype=logdet_dtype)
         return sign, logdet
 
     lu, ipiv, dev_info = _lu_factor(a, res_type)

From 6c20debd83f12e469461887b170e635162652da9 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Mon, 27 Nov 2023 13:00:27 +0100
Subject: [PATCH 13/55] Add new cupy tests for dpnp.linalg.det

---
 tests/test_linalg.py                          | 16 ++--
 .../cupy/linalg_tests/test_norms.py           | 76 +++++++++++++++++++
 2 files changed, 84 insertions(+), 8 deletions(-)

diff --git a/tests/test_linalg.py b/tests/test_linalg.py
index 89361795a6e2..2a78c3147f17 100644
--- a/tests/test_linalg.py
+++ b/tests/test_linalg.py
@@ -106,18 +106,18 @@ def test_cond(arr, p):
         [[0, 0], [0, 0]],
         [[1, 2], [1, 2]],
         [[1, 2], [3, 4]],
-        # [[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]],
-        # [
-        #     [[[1, 2], [3, 4]], [[1, 2], [2, 1]]],
-        #     [[[1, 3], [3, 1]], [[0, 1], [1, 3]]],
-        # ],
+        [[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]],
+        [
+            [[[1, 2], [3, 4]], [[1, 2], [2, 1]]],
+            [[[1, 3], [3, 1]], [[0, 1], [1, 3]]],
+        ],
     ],
     ids=[
         "[[0, 0], [0, 0]]",
         "[[1, 2], [1, 2]]",
         "[[1, 2], [3, 4]]",
-        # "[[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]]",
-        # "[[[[1, 2], [3, 4]], [[1, 2], [2, 1]]], [[[1, 3], [3, 1]], [[0, 1], [1, 3]]]]",
+        "[[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]]",
+        "[[[[1, 2], [3, 4]], [[1, 2], [2, 1]]], [[[1, 3], [3, 1]], [[0, 1], [1, 3]]]]",
     ],
 )
 def test_det(array):
@@ -128,7 +128,7 @@ def test_det(array):
     assert_allclose(expected, result)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
+# @pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_det_empty():
     a = numpy.empty((0, 0, 2, 2), dtype=numpy.float32)
     ia = inp.array(a)
diff --git a/tests/third_party/cupy/linalg_tests/test_norms.py b/tests/third_party/cupy/linalg_tests/test_norms.py
index 71c8922a23ea..e23409d877f0 100644
--- a/tests/third_party/cupy/linalg_tests/test_norms.py
+++ b/tests/third_party/cupy/linalg_tests/test_norms.py
@@ -7,6 +7,82 @@
 from tests.third_party.cupy import testing
 
 
+class TestDet(unittest.TestCase):
+    @testing.for_float_dtypes(no_float16=True)
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_det(self, xp, dtype):
+        a = testing.shaped_arange((2, 2), xp, dtype) + 1
+        return xp.linalg.det(a)
+
+    @testing.for_float_dtypes(no_float16=True)
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_det_3(self, xp, dtype):
+        a = testing.shaped_arange((2, 2, 2), xp, dtype) + 1
+        return xp.linalg.det(a)
+
+    @testing.for_float_dtypes(no_float16=True)
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_det_4(self, xp, dtype):
+        a = testing.shaped_arange((2, 2, 2, 2), xp, dtype) + 1
+        return xp.linalg.det(a)
+
+    @testing.for_float_dtypes(no_float16=True)
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_det_empty_batch(self, xp, dtype):
+        a = xp.empty((2, 0, 3, 3), dtype)
+        return xp.linalg.det(a)
+
+    @testing.for_float_dtypes(no_float16=True)
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_det_empty_matrix(self, xp, dtype):
+        a = xp.empty((0, 0), dtype)
+        return xp.linalg.det(a)
+
+    @testing.for_float_dtypes(no_float16=True)
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_det_empty_matrices(self, xp, dtype):
+        a = xp.empty((2, 3, 0, 0), dtype)
+        return xp.linalg.det(a)
+
+    @testing.for_float_dtypes(no_float16=True)
+    def test_det_different_last_two_dims(self, dtype):
+        for xp in (numpy, cupy):
+            a = testing.shaped_arange((2, 3, 2), xp, dtype)
+            # TODO: replace ValueError with dpnp.linalg.LinAlgError
+            with pytest.raises((numpy.linalg.LinAlgError, ValueError)):
+                xp.linalg.det(a)
+
+    @testing.for_float_dtypes(no_float16=True)
+    def test_det_different_last_two_dims_empty_batch(self, dtype):
+        for xp in (numpy, cupy):
+            a = xp.empty((0, 3, 2), dtype=dtype)
+            # TODO: replace ValueError with dpnp.linalg.LinAlgError
+            with pytest.raises((numpy.linalg.LinAlgError, ValueError)):
+                xp.linalg.det(a)
+
+    @testing.for_float_dtypes(no_float16=True)
+    def test_det_one_dim(self, dtype):
+        for xp in (numpy, cupy):
+            a = testing.shaped_arange((2,), xp, dtype)
+            # TODO: replace ValueError with dpnp.linalg.LinAlgError
+            with pytest.raises((numpy.linalg.LinAlgError, ValueError)):
+                xp.linalg.det(a)
+
+    @testing.for_float_dtypes(no_float16=True)
+    def test_det_zero_dim(self, dtype):
+        for xp in (numpy, cupy):
+            a = testing.shaped_arange((), xp, dtype)
+            # TODO: replace ValueError with dpnp.linalg.LinAlgError
+            with pytest.raises((numpy.linalg.LinAlgError, ValueError)):
+                xp.linalg.det(a)
+
+    @testing.for_float_dtypes(no_float16=True)
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_det_singular(self, xp, dtype):
+        a = xp.zeros((2, 3, 3), dtype=dtype)
+        return xp.linalg.det(a)
+
+
 class TestSlogdet(unittest.TestCase):
     @testing.for_dtypes("fd")
     @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)

From 3359c05cb413af7ed039f9456a7394bfdc596b0e Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Mon, 27 Nov 2023 16:13:30 +0100
Subject: [PATCH 14/55] Add ipiv_vecs and dev_info_vecs in _lu_factor for the
 batch case

---
 dpnp/linalg/dpnp_iface_linalg.py              |  8 ++++-
 dpnp/linalg/dpnp_utils_linalg.py              | 35 ++++++++++---------
 .../cupy/linalg_tests/test_norms.py           |  5 +++
 3 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py
index 27bd09878926..e764a870b993 100644
--- a/dpnp/linalg/dpnp_iface_linalg.py
+++ b/dpnp/linalg/dpnp_iface_linalg.py
@@ -695,7 +695,13 @@ def slogdet(a):
     logdet = dpnp.log(dpnp.abs(diag)).sum(axis=-1)
 
     # ipiv is 1-origin
-    non_zero = dpnp.count_nonzero(ipiv != dpnp.arange(1, n + 1), axis=-1)
+    non_zero = dpnp.count_nonzero(
+        ipiv
+        != dpnp.arange(
+            1, n + 1, usm_type=ipiv.usm_type, sycl_queue=ipiv.sycl_queue
+        ),
+        axis=-1,
+    )
     if res_type.kind == "f":
         non_zero += dpnp.count_nonzero(diag < 0, axis=-1)
 
diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index 435263701086..1e2081802928 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -217,18 +217,9 @@ def _lu_factor(a, res_type):
         a_usm_arr = dpnp.get_usm_ndarray(a)
 
         a_vecs = [None] * batch_size
-        ipiv_h = dpnp.empty(
-            (batch_size, n),
-            dtype=dpnp.int64,
-            usm_type=a_usm_type,
-            sycl_queue=a_sycl_queue,
-        )
-        dev_info_h = dpnp.empty(
-            (batch_size,),
-            dtype=dpnp.int64,
-            usm_type=a_usm_type,
-            sycl_queue=a_sycl_queue,
-        )
+        ipiv_vecs = [None] * batch_size
+        dev_info_vecs = [None] * batch_size
+
         a_ht_copy_ev = [None] * batch_size
         ht_lapack_ev = [None] * batch_size
 
@@ -239,13 +230,25 @@ def _lu_factor(a, res_type):
                 dst=a_vecs[i].get_array(),
                 sycl_queue=a_sycl_queue,
             )
+            ipiv_vecs[i] = dpnp.empty(
+                (n,),
+                dtype=dpnp.int64,
+                usm_type=a_usm_type,
+                sycl_queue=a_sycl_queue,
+            )
+            dev_info_vecs[i] = dpnp.empty(
+                (1,),
+                dtype=dpnp.int64,
+                usm_type=a_usm_type,
+                sycl_queue=a_sycl_queue,
+            )
 
             ht_lapack_ev[i], _ = li._getrf(
                 a_sycl_queue,
                 n,
                 a_vecs[i].get_array(),
-                ipiv_h[i].get_array(),
-                dev_info_h[i].get_array(),
+                ipiv_vecs[i].get_array(),
+                dev_info_vecs[i].get_array(),
                 [a_copy_ev],
             )
 
@@ -254,8 +257,8 @@ def _lu_factor(a, res_type):
             a_ht_copy_ev[i].wait()
 
         out_v = dpnp.array(a_vecs, order=a_order).reshape(orig_shape)
-        out_ipiv = ipiv_h.reshape(orig_shape[:-1])
-        out_dev_info = dev_info_h.reshape(orig_shape[:-2])
+        out_ipiv = dpnp.array(ipiv_vecs).reshape(orig_shape[:-1])
+        out_dev_info = dpnp.array(dev_info_vecs).reshape(orig_shape[:-2])
 
         return (out_v, out_ipiv, out_dev_info)
 
diff --git a/tests/third_party/cupy/linalg_tests/test_norms.py b/tests/third_party/cupy/linalg_tests/test_norms.py
index e23409d877f0..4c0449019a14 100644
--- a/tests/third_party/cupy/linalg_tests/test_norms.py
+++ b/tests/third_party/cupy/linalg_tests/test_norms.py
@@ -4,6 +4,7 @@
 import pytest
 
 import dpnp as cupy
+from tests.helper import is_cpu_device
 from tests.third_party.cupy import testing
 
 
@@ -105,6 +106,10 @@ def test_slogdet_4(self, xp, dtype):
         sign, logdet = xp.linalg.slogdet(a)
         return sign, logdet
 
+    # TODO: remove it
+    @pytest.mark.skipif(
+        is_cpu_device(), reason="Need to add a logic for this case"
+    )
     @testing.for_dtypes("fd")
     @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
     def test_slogdet_singular(self, xp, dtype):

From 1a913856a3f7a7e21174b183a7719d2459d1c1e9 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Mon, 27 Nov 2023 16:25:02 +0100
Subject: [PATCH 15/55] Skip test_det on CPU due to bug in MKL

---
 tests/test_linalg.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_linalg.py b/tests/test_linalg.py
index 2a78c3147f17..1631f22e1364 100644
--- a/tests/test_linalg.py
+++ b/tests/test_linalg.py
@@ -5,7 +5,7 @@
 
 import dpnp as inp
 
-from .helper import get_all_dtypes, get_complex_dtypes, has_support_aspect64
+from .helper import get_all_dtypes, has_support_aspect64, is_cpu_device
 
 
 def vvsort(val, vec, size, xp):
@@ -100,6 +100,7 @@ def test_cond(arr, p):
     assert_array_equal(expected, result)
 
 
+@pytest.mark.skipif(is_cpu_device(), reason="MKL bug MKLD-16626")
 @pytest.mark.parametrize(
     "array",
     [
@@ -128,7 +129,6 @@ def test_det(array):
     assert_allclose(expected, result)
 
 
-# @pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_det_empty():
     a = numpy.empty((0, 0, 2, 2), dtype=numpy.float32)
     ia = inp.array(a)

From b8607909044529f494c92cd655dc897ee4dd608f Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Tue, 28 Nov 2023 12:25:16 +0100
Subject: [PATCH 16/55] Small update of cupy tests in test_norms.py

---
 tests/third_party/cupy/linalg_tests/test_norms.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/third_party/cupy/linalg_tests/test_norms.py b/tests/third_party/cupy/linalg_tests/test_norms.py
index 4c0449019a14..ea6f3ec3941e 100644
--- a/tests/third_party/cupy/linalg_tests/test_norms.py
+++ b/tests/third_party/cupy/linalg_tests/test_norms.py
@@ -30,19 +30,19 @@ def test_det_4(self, xp, dtype):
     @testing.for_float_dtypes(no_float16=True)
     @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
     def test_det_empty_batch(self, xp, dtype):
-        a = xp.empty((2, 0, 3, 3), dtype)
+        a = xp.empty((2, 0, 3, 3), dtype=dtype)
         return xp.linalg.det(a)
 
     @testing.for_float_dtypes(no_float16=True)
     @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
     def test_det_empty_matrix(self, xp, dtype):
-        a = xp.empty((0, 0), dtype)
+        a = xp.empty((0, 0), dtype=dtype)
         return xp.linalg.det(a)
 
     @testing.for_float_dtypes(no_float16=True)
     @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
     def test_det_empty_matrices(self, xp, dtype):
-        a = xp.empty((2, 3, 0, 0), dtype)
+        a = xp.empty((2, 3, 0, 0), dtype=dtype)
         return xp.linalg.det(a)
 
     @testing.for_float_dtypes(no_float16=True)
@@ -77,6 +77,8 @@ def test_det_zero_dim(self, dtype):
             with pytest.raises((numpy.linalg.LinAlgError, ValueError)):
                 xp.linalg.det(a)
 
+    # TODO: remove skipif when MKLD-16626 is resolved
+    @pytest.mark.skipif(is_cpu_device(), reason="MKL bug MKLD-16626")
     @testing.for_float_dtypes(no_float16=True)
     @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
     def test_det_singular(self, xp, dtype):
@@ -106,10 +108,8 @@ def test_slogdet_4(self, xp, dtype):
         sign, logdet = xp.linalg.slogdet(a)
         return sign, logdet
 
-    # TODO: remove it
-    @pytest.mark.skipif(
-        is_cpu_device(), reason="Need to add a logic for this case"
-    )
+    # TODO: remove skipif when MKLD-16626 is resolved
+    @pytest.mark.skipif(is_cpu_device(), reason="MKL bug MKLD-16626")
     @testing.for_dtypes("fd")
     @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
     def test_slogdet_singular(self, xp, dtype):

From 37d476bcb654ccc5c772ccd4a9434687b75e41a5 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 29 Nov 2023 15:29:22 +0100
Subject: [PATCH 17/55] Add support of complex dtype for dpnp.diagonal and
 update test_diagonal

---
 dpnp/backend/kernels/dpnp_krnl_indexing.cpp | 4 ++++
 tests/test_indexing.py                      | 5 +++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp
index 2cca84f9e61f..eceadd7d9650 100644
--- a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp
@@ -950,6 +950,10 @@ void func_map_init_indexing_func(func_map_t &fmap)
         eft_FLT, (void *)dpnp_diagonal_ext_c<float>};
     fmap[DPNPFuncName::DPNP_FN_DIAGONAL_EXT][eft_DBL][eft_DBL] = {
         eft_DBL, (void *)dpnp_diagonal_ext_c<double>};
+    fmap[DPNPFuncName::DPNP_FN_DIAGONAL_EXT][eft_C64][eft_C64] = {
+        eft_C64, (void *)dpnp_diagonal_ext_c<std::complex<float>>};
+    fmap[DPNPFuncName::DPNP_FN_DIAGONAL_EXT][eft_C128][eft_C128] = {
+        eft_C128, (void *)dpnp_diagonal_ext_c<std::complex<double>>};
 
     fmap[DPNPFuncName::DPNP_FN_FILL_DIAGONAL][eft_INT][eft_INT] = {
         eft_INT, (void *)dpnp_fill_diagonal_default_c<int32_t>};
diff --git a/tests/test_indexing.py b/tests/test_indexing.py
index 03541dc2d55d..33d3746f19b1 100644
--- a/tests/test_indexing.py
+++ b/tests/test_indexing.py
@@ -77,6 +77,7 @@ def test_choose():
     assert_array_equal(expected, result)
 
 
+@pytest.mark.parametrize("arr_dtype", get_all_dtypes(no_bool=True))
 @pytest.mark.parametrize("offset", [0, 1], ids=["0", "1"])
 @pytest.mark.parametrize(
     "array",
@@ -112,8 +113,8 @@ def test_choose():
         "[[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], [[[13, 14, 15], [16, 17, 18]], [[19, 20, 21], [22, 23, 24]]]]",
     ],
 )
-def test_diagonal(array, offset):
-    a = numpy.array(array)
+def test_diagonal(array, arr_dtype, offset):
+    a = numpy.array(array, dtype=arr_dtype)
     ia = dpnp.array(a)
     expected = numpy.diagonal(a, offset)
     result = dpnp.diagonal(ia, offset)

From 2e8b4fe35a6a70a2a5a83436a2e76b5c80a0fc37 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 29 Nov 2023 16:05:34 +0100
Subject: [PATCH 18/55] lu_factor func returns the result of LU decomposition
 as c-contiguous and add explanatory comments

---
 dpnp/backend/extensions/lapack/getrf.cpp | 12 +++----
 dpnp/linalg/dpnp_utils_linalg.py         | 42 +++++++++++-------------
 2 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp
index b7d830f63c24..a83d94ac5069 100644
--- a/dpnp/backend/extensions/lapack/getrf.cpp
+++ b/dpnp/backend/extensions/lapack/getrf.cpp
@@ -84,13 +84,13 @@ static sycl::event getrf_impl(sycl::queue exec_q,
 
         getrf_event = oneapi::mkl::lapack::getrf(
             exec_q,
-            n,   // The order of the matrix A; (0 ≤ n).
-            n,   // The order of the matrix A; (0 ≤ n).
-            a,   // Pointer to the n-by-n coefficient matrix A.
-            lda, // The leading dimension of a.
-            ipiv,
+            n,          // Order of the square matrix; (0 ≤ n).
+            n,          // Order of the square matrix; (0 ≤ n).
+            a,          // Pointer to the n-by-n matrix.
+            lda,        // The leading dimension of `a`.
+            ipiv,       // Pointer to the array of pivot indices.
             scratchpad, // Pointer to scratchpad memory to be used by MKL
-                        // routine for storing intermediate results
+                        // routine for storing intermediate results.
             scratchpad_size, depends);
     } catch (mkl_lapack::exception const &e) {
         error_msg
diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index 1e2081802928..1139ea5ab121 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -204,7 +204,6 @@ def _lu_factor(a, res_type):
     if m != n:
         raise ValueError("Last 2 dimensions of the input array must be square")
 
-    a_order = "C" if a.flags.c_contiguous else "F"
     a_sycl_queue = a.sycl_queue
     a_usm_type = a.usm_type
 
@@ -212,19 +211,22 @@ def _lu_factor(a, res_type):
 
     if a.ndim > 2:
         orig_shape = a.shape
+        # get 3d input arrays by reshape
         a = a.reshape(-1, n, n)
         batch_size = a.shape[0]
         a_usm_arr = dpnp.get_usm_ndarray(a)
 
+        # Initialize lists for storing arrays and events for each batch
         a_vecs = [None] * batch_size
         ipiv_vecs = [None] * batch_size
         dev_info_vecs = [None] * batch_size
-
         a_ht_copy_ev = [None] * batch_size
         ht_lapack_ev = [None] * batch_size
 
+        # Process each batch
         for i in range(batch_size):
-            a_vecs[i] = dpnp.empty_like(a[i], order="F", dtype=res_type)
+            # Copy each 2D slice to a new array as getrf destroys the input matrix
+            a_vecs[i] = dpnp.empty_like(a[i], order="C", dtype=res_type)
             a_ht_copy_ev[i], a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
                 src=a_usm_arr[i],
                 dst=a_vecs[i].get_array(),
@@ -243,6 +245,8 @@ def _lu_factor(a, res_type):
                 sycl_queue=a_sycl_queue,
             )
 
+            # Call the LAPACK extension function _getrf
+            # to perform LU decomposition on each batch in 'a_vecs[i]'
             ht_lapack_ev[i], _ = li._getrf(
                 a_sycl_queue,
                 n,
@@ -256,7 +260,8 @@ def _lu_factor(a, res_type):
             ht_lapack_ev[i].wait()
             a_ht_copy_ev[i].wait()
 
-        out_v = dpnp.array(a_vecs, order=a_order).reshape(orig_shape)
+        # Reshape the results back to their original shape
+        out_v = dpnp.array(a_vecs, order="C").reshape(orig_shape)
         out_ipiv = dpnp.array(ipiv_vecs).reshape(orig_shape[:-1])
         out_dev_info = dpnp.array(dev_info_vecs).reshape(orig_shape[:-2])
 
@@ -265,7 +270,8 @@ def _lu_factor(a, res_type):
     else:
         a_usm_arr = dpnp.get_usm_ndarray(a)
 
-        a_h = dpnp.empty_like(a, order="F", dtype=res_type)
+        # `a`` must be copied because getrf destroys the input matrix
+        a_h = dpnp.empty_like(a, order="C", dtype=res_type)
         ipiv_h = dpnp.empty(
             n, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue
         )
@@ -273,11 +279,15 @@ def _lu_factor(a, res_type):
             1, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue
         )
 
+        # use DPCTL tensor function to fill the сopy of the input array
+        # from the input array
         a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
             src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a_sycl_queue
         )
 
-        ht_lapack_ev, lapack_ev = li._getrf(
+        # Call the LAPACK extension function _getrf
+        # to perform LU decomposition on the input matrix
+        ht_lapack_ev, _ = li._getrf(
             a_sycl_queue,
             n,
             a_h.get_array(),
@@ -286,22 +296,10 @@ def _lu_factor(a, res_type):
             [a_copy_ev],
         )
 
-        if a_order != "F":
-            # need to align order of the result of solutions with the
-            # input array of multiple dependent variables
-            a_h_f = dpnp.empty_like(a_h, order=a_order)
-            ht_copy_out_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray(
-                src=a_h.get_array(),
-                dst=a_h_f.get_array(),
-                sycl_queue=a_sycl_queue,
-                depends=[lapack_ev],
-            )
-            ht_copy_out_ev.wait()
-            out_v = (a_h_f, ipiv_h, dev_info_h)
-        else:
-            out_v = (a_h, ipiv_h, dev_info_h)
-
         ht_lapack_ev.wait()
         a_ht_copy_ev.wait()
 
-        return out_v
+        # Return a tuple containing the factorized matrix 'a_h',
+        # pivot indices 'ipiv_h'
+        # and the status 'dev_info_h' from the LAPACK getrf call
+        return (a_h, ipiv_h, dev_info_h)

From 3849d92189b3c5587a2e204a0a6c7aa52e0529af Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 29 Nov 2023 18:40:19 +0100
Subject: [PATCH 19/55] Add getrf_batch MKL extension

---
 dpnp/backend/extensions/lapack/CMakeLists.txt |   1 +
 dpnp/backend/extensions/lapack/getrf.hpp      |  12 +
 .../backend/extensions/lapack/getrf_batch.cpp | 220 ++++++++++++++++++
 dpnp/backend/extensions/lapack/lapack_py.cpp  |  11 +-
 .../extensions/lapack/types_matrix.hpp        |  28 ++-
 dpnp/linalg/dpnp_utils_linalg.py              | 119 +++++++---
 6 files changed, 353 insertions(+), 38 deletions(-)
 create mode 100644 dpnp/backend/extensions/lapack/getrf_batch.cpp

diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt
index a4ff161d0edc..fa6085895088 100644
--- a/dpnp/backend/extensions/lapack/CMakeLists.txt
+++ b/dpnp/backend/extensions/lapack/CMakeLists.txt
@@ -28,6 +28,7 @@ set(python_module_name _lapack_impl)
 set(_module_src
     ${CMAKE_CURRENT_SOURCE_DIR}/lapack_py.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/getrf.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/getrf_batch.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/heevd.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/syevd.cpp
 )
diff --git a/dpnp/backend/extensions/lapack/getrf.hpp b/dpnp/backend/extensions/lapack/getrf.hpp
index 2b5d7c18ce47..e66edefc52cd 100644
--- a/dpnp/backend/extensions/lapack/getrf.hpp
+++ b/dpnp/backend/extensions/lapack/getrf.hpp
@@ -46,7 +46,19 @@ extern std::pair<sycl::event, sycl::event>
           dpctl::tensor::usm_ndarray dev_info_array,
           const std::vector<sycl::event> &depends = {});
 
+extern std::pair<sycl::event, sycl::event>
+    getrf_batch(sycl::queue exec_q,
+                dpctl::tensor::usm_ndarray a_array,
+                dpctl::tensor::usm_ndarray ipiv_array,
+                dpctl::tensor::usm_ndarray dev_info_array,
+                std::int64_t n,
+                std::int64_t stride_a,
+                std::int64_t stride_ipiv,
+                std::int64_t batch_size,
+                const std::vector<sycl::event> &depends = {});
+
 extern void init_getrf_dispatch_vector(void);
+extern void init_getrf_batch_dispatch_vector(void);
 } // namespace lapack
 } // namespace ext
 } // namespace backend
diff --git a/dpnp/backend/extensions/lapack/getrf_batch.cpp b/dpnp/backend/extensions/lapack/getrf_batch.cpp
new file mode 100644
index 000000000000..b552358965d1
--- /dev/null
+++ b/dpnp/backend/extensions/lapack/getrf_batch.cpp
@@ -0,0 +1,220 @@
+//*****************************************************************************
+// Copyright (c) 2023, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include <pybind11/pybind11.h>
+
+// dpctl tensor headers
+#include "utils/memory_overlap.hpp"
+#include "utils/type_utils.hpp"
+
+#include "getrf.hpp"
+#include "types_matrix.hpp"
+
+#include "dpnp_utils.hpp"
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace lapack
+{
+namespace mkl_lapack = oneapi::mkl::lapack;
+namespace py = pybind11;
+namespace type_utils = dpctl::tensor::type_utils;
+
+typedef sycl::event (*getrf_batch_impl_fn_ptr_t)(
+    sycl::queue,
+    std::int64_t,
+    char *,
+    std::int64_t,
+    std::int64_t,
+    std::int64_t *,
+    std::int64_t,
+    std::int64_t,
+    std::int64_t *,
+    std::vector<sycl::event> &,
+    const std::vector<sycl::event> &);
+
+static getrf_batch_impl_fn_ptr_t
+    getrf_batch_dispatch_vector[dpctl_td_ns::num_types];
+
+template <typename T>
+static sycl::event getrf_batch_impl(sycl::queue exec_q,
+                                    std::int64_t n,
+                                    char *in_a,
+                                    std::int64_t lda,
+                                    std::int64_t stride_a,
+                                    std::int64_t *ipiv,
+                                    std::int64_t stride_ipiv,
+                                    std::int64_t batch_size,
+                                    std::int64_t *dev_info,
+                                    std::vector<sycl::event> &host_task_events,
+                                    const std::vector<sycl::event> &depends)
+{
+    type_utils::validate_type_for_device<T>(exec_q);
+
+    T *a = reinterpret_cast<T *>(in_a);
+
+    const std::int64_t scratchpad_size =
+        oneapi::mkl::lapack::getrf_batch_scratchpad_size<T>(
+            exec_q, n, n, lda, stride_a, stride_ipiv, batch_size);
+    T *scratchpad = nullptr;
+
+    std::stringstream error_msg;
+    std::int64_t info = 0;
+
+    sycl::event getrf_batch_event;
+    try {
+        scratchpad = sycl::malloc_device<T>(scratchpad_size, exec_q);
+
+        getrf_batch_event = oneapi::mkl::lapack::getrf_batch(
+            exec_q,
+            n,           // Order of each square matrix in the batch; (0 ≤ n).
+            n,           // Order of each square matrix in the batch; (0 ≤ n).
+            a,           // Pointer to the batch of matrices.
+            lda,         // The leading dimension of `a`.
+            stride_a,    // Stride between matrices: Element spacing between
+                         // matrices in `a`.
+            ipiv,        // Pivot indices: Pointer to the pivot indices for each
+                         // matrix.
+            stride_ipiv, // Stride between pivot indices: Spacing between pivot
+                         // arrays in 'ipiv'.
+            batch_size,  // Total number of matrices in the batch.
+            scratchpad,  // Pointer to scratchpad memory to be used by MKL
+                         // routine for storing intermediate results.
+            scratchpad_size, depends);
+    } catch (mkl_lapack::exception const &e) {
+        error_msg
+            << "Unexpected MKL exception caught during getrf() call:\nreason: "
+            << e.what() << "\ninfo: " << e.info();
+        info = e.info();
+    } catch (sycl::exception const &e) {
+        error_msg << "Unexpected SYCL exception caught during getrf() call:\n"
+                  << e.what();
+        info = -1;
+    }
+
+    if (info != 0) // an unexpected error occurs
+    {
+        if (scratchpad != nullptr) {
+            sycl::free(scratchpad, exec_q);
+        }
+
+        if (info < 0) {
+            throw std::runtime_error(error_msg.str());
+        }
+    }
+
+    sycl::event write_info_event = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(getrf_batch_event);
+        cgh.single_task([=]() { dev_info[0] = info; });
+    });
+
+    host_task_events.push_back(write_info_event);
+
+    sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(getrf_batch_event);
+        auto ctx = exec_q.get_context();
+        cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); });
+    });
+    host_task_events.push_back(clean_up_event);
+    return getrf_batch_event;
+}
+
+std::pair<sycl::event, sycl::event>
+    getrf_batch(sycl::queue q,
+                dpctl::tensor::usm_ndarray a_array,
+                dpctl::tensor::usm_ndarray ipiv_array,
+                dpctl::tensor::usm_ndarray dev_info_array,
+                std::int64_t n,
+                std::int64_t stride_a,
+                std::int64_t stride_ipiv,
+                std::int64_t batch_size,
+                const std::vector<sycl::event> &depends)
+{
+
+    auto array_types = dpctl_td_ns::usm_ndarray_types();
+    int a_array_type_id =
+        array_types.typenum_to_lookup_id(a_array.get_typenum());
+
+    getrf_batch_impl_fn_ptr_t getrf_batch_fn =
+        getrf_batch_dispatch_vector[a_array_type_id];
+    if (getrf_batch_fn == nullptr) {
+        throw py::value_error(
+            "No getrf_batch implementation defined for the provided type "
+            "of the input matrix.");
+    }
+
+    char *a_array_data = a_array.get_data();
+    const std::int64_t lda = std::max<size_t>(1UL, n);
+
+    // check valid ipiv
+
+    char *ipiv_array_data = ipiv_array.get_data();
+    std::int64_t *d_ipiv = reinterpret_cast<std::int64_t *>(ipiv_array_data);
+
+    char *dev_info_array_data = dev_info_array.get_data();
+    std::int64_t *d_dev_info =
+        reinterpret_cast<std::int64_t *>(dev_info_array_data);
+
+    std::vector<sycl::event> host_task_events;
+    sycl::event getrf_batch_ev =
+        getrf_batch_fn(q, n, a_array_data, lda, stride_a, d_ipiv, stride_ipiv,
+                       batch_size, d_dev_info, host_task_events, depends);
+
+    sycl::event args_ev = dpctl::utils::keep_args_alive(
+        q, {a_array, ipiv_array, dev_info_array}, host_task_events);
+
+    return std::make_pair(args_ev, getrf_batch_ev);
+}
+
+template <typename fnT, typename T>
+struct GetrfBatchContigFactory
+{
+    fnT get()
+    {
+        if constexpr (types::GetrfBatchTypePairSupportFactory<T>::is_defined) {
+            return getrf_batch_impl<T>;
+        }
+        else {
+            return nullptr;
+        }
+    }
+};
+
+void init_getrf_batch_dispatch_vector(void)
+{
+    dpctl_td_ns::DispatchVectorBuilder<getrf_batch_impl_fn_ptr_t,
+                                       GetrfBatchContigFactory,
+                                       dpctl_td_ns::num_types>
+        contig;
+    contig.populate_dispatch_vector(getrf_batch_dispatch_vector);
+}
+} // namespace lapack
+} // namespace ext
+} // namespace backend
+} // namespace dpnp
diff --git a/dpnp/backend/extensions/lapack/lapack_py.cpp b/dpnp/backend/extensions/lapack/lapack_py.cpp
index 38320d205aa3..c504f9dbd9f8 100644
--- a/dpnp/backend/extensions/lapack/lapack_py.cpp
+++ b/dpnp/backend/extensions/lapack/lapack_py.cpp
@@ -40,8 +40,9 @@ namespace py = pybind11;
 // populate dispatch vectors
 void init_dispatch_vectors(void)
 {
-    lapack_ext::init_syevd_dispatch_vector();
+    lapack_ext::init_getrf_batch_dispatch_vector();
     lapack_ext::init_getrf_dispatch_vector();
+    lapack_ext::init_syevd_dispatch_vector();
 }
 
 // populate dispatch tables
@@ -75,4 +76,12 @@ PYBIND11_MODULE(_lapack_impl, m)
           py::arg("sycl_queue"), py::arg("n"), py::arg("a_array"),
           py::arg("ipiv_array"), py::arg("dev_info_array"),
           py::arg("depends") = py::list());
+
+    m.def("_getrf_batch", &lapack_ext::getrf_batch,
+          "Call `getrf_batch` from OneMKL LAPACK library to return "
+          "the LU factorization of a batch of general n x n matrices",
+          py::arg("sycl_queue"), py::arg("a_array"), py::arg("ipiv_array"),
+          py::arg("dev_info_array"), py::arg("n"), py::arg("stride_a"),
+          py::arg("stride_ipiv"), py::arg("batch_size"),
+          py::arg("depends") = py::list());
 }
diff --git a/dpnp/backend/extensions/lapack/types_matrix.hpp b/dpnp/backend/extensions/lapack/types_matrix.hpp
index b5276e5f1318..8216a61f842d 100644
--- a/dpnp/backend/extensions/lapack/types_matrix.hpp
+++ b/dpnp/backend/extensions/lapack/types_matrix.hpp
@@ -86,7 +86,7 @@ struct SyevdTypePairSupportFactory
  * MKL LAPACK library provides support in oneapi::mkl::lapack::getrf<T>
  * function.
  *
- * @tparam T Type of array containing input matrix A,
+ * @tparam T Type of array containing input matrix,
  * as well as the output array for storing the LU factorization.
  */
 template <typename T>
@@ -106,6 +106,32 @@ struct GetrfTypePairSupportFactory
         // fall-through
         dpctl_td_ns::NotDefinedEntry>::is_defined;
 };
+
+/**
+ * @brief A factory to define pairs of supported types for which
+ * MKL LAPACK library provides support in oneapi::mkl::lapack::getrf_batch<T>
+ * function.
+ *
+ * @tparam T Type of array containing input matrix,
+ * as well as the output array for storing the LU factorization.
+ */
+template <typename T>
+struct GetrfBatchTypePairSupportFactory
+{
+    static constexpr bool is_defined = std::disjunction<
+        dpctl_td_ns::TypePairDefinedEntry<T, double, T, double>,
+        dpctl_td_ns::TypePairDefinedEntry<T, float, T, float>,
+        dpctl_td_ns::TypePairDefinedEntry<T,
+                                          std::complex<float>,
+                                          T,
+                                          std::complex<float>>,
+        dpctl_td_ns::TypePairDefinedEntry<T,
+                                          std::complex<double>,
+                                          T,
+                                          std::complex<double>>,
+        // fall-through
+        dpctl_td_ns::NotDefinedEntry>::is_defined;
+};
 } // namespace types
 } // namespace lapack
 } // namespace ext
diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index 1139ea5ab121..27a00c7de1af 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -207,7 +207,8 @@ def _lu_factor(a, res_type):
     a_sycl_queue = a.sycl_queue
     a_usm_type = a.usm_type
 
-    # TODO: use getrf_batch
+    # TODO: Find out at which array sizes the best performance is obtained
+    use_batch = True
 
     if a.ndim > 2:
         orig_shape = a.shape
@@ -216,61 +217,107 @@ def _lu_factor(a, res_type):
         batch_size = a.shape[0]
         a_usm_arr = dpnp.get_usm_ndarray(a)
 
-        # Initialize lists for storing arrays and events for each batch
-        a_vecs = [None] * batch_size
-        ipiv_vecs = [None] * batch_size
-        dev_info_vecs = [None] * batch_size
-        a_ht_copy_ev = [None] * batch_size
-        ht_lapack_ev = [None] * batch_size
-
-        # Process each batch
-        for i in range(batch_size):
-            # Copy each 2D slice to a new array as getrf destroys the input matrix
-            a_vecs[i] = dpnp.empty_like(a[i], order="C", dtype=res_type)
-            a_ht_copy_ev[i], a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
-                src=a_usm_arr[i],
-                dst=a_vecs[i].get_array(),
-                sycl_queue=a_sycl_queue,
-            )
-            ipiv_vecs[i] = dpnp.empty(
-                (n,),
+        if use_batch:
+            # `a` must be copied because getrf_batch destroys the input matrix
+            a_h = dpnp.empty_like(a, order="C", dtype=res_type)
+            ipiv_h = dpnp.empty(
+                (batch_size, n),
                 dtype=dpnp.int64,
                 usm_type=a_usm_type,
                 sycl_queue=a_sycl_queue,
             )
-            dev_info_vecs[i] = dpnp.empty(
-                (1,),
+            dev_info_h = dpnp.empty(
+                1,
                 dtype=dpnp.int64,
                 usm_type=a_usm_type,
                 sycl_queue=a_sycl_queue,
             )
 
-            # Call the LAPACK extension function _getrf
-            # to perform LU decomposition on each batch in 'a_vecs[i]'
-            ht_lapack_ev[i], _ = li._getrf(
+            a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
+                src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a_sycl_queue
+            )
+
+            ipiv_stride = n
+            a_stride = a_h.strides[0]
+
+            # Call the LAPACK extension function _getrf_batch
+            # to perform LU decomposition of a batch of general matrices
+            ht_lapack_ev, _ = li._getrf_batch(
                 a_sycl_queue,
+                a_h.get_array(),
+                ipiv_h.get_array(),
+                dev_info_h.get_array(),
                 n,
-                a_vecs[i].get_array(),
-                ipiv_vecs[i].get_array(),
-                dev_info_vecs[i].get_array(),
+                a_stride,
+                ipiv_stride,
+                batch_size,
                 [a_copy_ev],
             )
 
-        for i in range(batch_size):
-            ht_lapack_ev[i].wait()
-            a_ht_copy_ev[i].wait()
+            ht_lapack_ev.wait()
+            a_ht_copy_ev.wait()
 
-        # Reshape the results back to their original shape
-        out_v = dpnp.array(a_vecs, order="C").reshape(orig_shape)
-        out_ipiv = dpnp.array(ipiv_vecs).reshape(orig_shape[:-1])
-        out_dev_info = dpnp.array(dev_info_vecs).reshape(orig_shape[:-2])
+            return (a_h, ipiv_h, dev_info_h)
 
-        return (out_v, out_ipiv, out_dev_info)
+        else:
+            # Initialize lists for storing arrays and events for each batch
+            a_vecs = [None] * batch_size
+            ipiv_vecs = [None] * batch_size
+            dev_info_vecs = [None] * batch_size
+            a_ht_copy_ev = [None] * batch_size
+            ht_lapack_ev = [None] * batch_size
+
+            # Process each batch
+            for i in range(batch_size):
+                # Copy each 2D slice to a new array as getrf destroys the input matrix
+                a_vecs[i] = dpnp.empty_like(a[i], order="C", dtype=res_type)
+                (
+                    a_ht_copy_ev[i],
+                    a_copy_ev,
+                ) = ti._copy_usm_ndarray_into_usm_ndarray(
+                    src=a_usm_arr[i],
+                    dst=a_vecs[i].get_array(),
+                    sycl_queue=a_sycl_queue,
+                )
+                ipiv_vecs[i] = dpnp.empty(
+                    (n,),
+                    dtype=dpnp.int64,
+                    usm_type=a_usm_type,
+                    sycl_queue=a_sycl_queue,
+                )
+                dev_info_vecs[i] = dpnp.empty(
+                    (1,),
+                    dtype=dpnp.int64,
+                    usm_type=a_usm_type,
+                    sycl_queue=a_sycl_queue,
+                )
+
+                # Call the LAPACK extension function _getrf
+                # to perform LU decomposition on each batch in 'a_vecs[i]'
+                ht_lapack_ev[i], _ = li._getrf(
+                    a_sycl_queue,
+                    n,
+                    a_vecs[i].get_array(),
+                    ipiv_vecs[i].get_array(),
+                    dev_info_vecs[i].get_array(),
+                    [a_copy_ev],
+                )
+
+            for i in range(batch_size):
+                ht_lapack_ev[i].wait()
+                a_ht_copy_ev[i].wait()
+
+            # Reshape the results back to their original shape
+            out_v = dpnp.array(a_vecs, order="C").reshape(orig_shape)
+            out_ipiv = dpnp.array(ipiv_vecs).reshape(orig_shape[:-1])
+            out_dev_info = dpnp.array(dev_info_vecs).reshape(orig_shape[:-2])
+
+            return (out_v, out_ipiv, out_dev_info)
 
     else:
         a_usm_arr = dpnp.get_usm_ndarray(a)
 
-        # `a`` must be copied because getrf destroys the input matrix
+        # `a` must be copied because getrf destroys the input matrix
         a_h = dpnp.empty_like(a, order="C", dtype=res_type)
         ipiv_h = dpnp.empty(
             n, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue

From cd282a38a7ead6478ed54b39291fcc3af7cb7f70 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 30 Nov 2023 10:56:42 +0100
Subject: [PATCH 20/55] Update docstring for slogdet

---
 dpnp/linalg/dpnp_iface_linalg.py | 83 +++++++++++++++++++++++---------
 1 file changed, 59 insertions(+), 24 deletions(-)

diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py
index e764a870b993..cbc742d72440 100644
--- a/dpnp/linalg/dpnp_iface_linalg.py
+++ b/dpnp/linalg/dpnp_iface_linalg.py
@@ -606,38 +606,69 @@ def svd(x1, full_matrices=True, compute_uv=True, hermitian=False):
 
 
 def slogdet(a):
-    """Returns sign and logarithm of the determinant of an array.
+    """
+    Returns sign and logarithm of the determinant of an array.
 
     It calculates the natural logarithm of the determinant of a given value.
 
-    Args:
+    For full documentation refer to :obj:`numpy.linalg.slogdet`.
+
+    Args
+    ----
         a (dpnp.ndarray): The input matrix with dimension ``(..., N, N)``.
 
-    Returns:
-        tuple of :class:`~dpnp.ndarray`:
-            It returns a tuple ``(sign, logdet)``. ``sign`` represents each
-            sign of the determinant as a real number ``0``, ``1`` or ``-1``.
-            'logdet' represents the natural logarithm of the absolute of the
+    Returns
+    -------
+        out : tuple of :class:`~dpnp.ndarray`:
+            It returns a tuple ``(sign, logdet)``.
+            ``sign`` represents each sign of the determinant as a real number
+            ``0``, ``1`` or ``-1``.
+            ``logdet`` represents the natural logarithm of the absolute of the
             determinant.
             If the determinant is zero, ``sign`` will be ``0`` and ``logdet``
             will be ``-inf``.
             The shapes of both ``sign`` and ``logdet`` are equal to
             ``a.shape[:-2]``.
 
-    .. warning::
-        This function calls one or more cuSOLVER routine(s) which may yield
-        invalid results if input conditions are not met.
-        To detect these invalid results, you can set the `linalg`
-        configuration to a value that is not `ignore` in
-        :func:`cupyx.errstate` or :func:`cupyx.seterr`.
+    Limitations
+    -----------
+    Parameter `a` is supported as :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`.
+    Input array data types are limited by supported DPNP :ref:`Data types`.
+
+    See Also
+    --------
+    :obj:`dpnp.det` : Returns the determinant of an array.
+
+    Examples
+    --------
+    The determinant of a 2-D array [[a, b], [c, d]] is ad - bc:
+
+    >>> import dpnp as dp
+    >>> a = dp.array([[1, 2], [3, 4]])
+    >>> (sign, logabsdet) = dp.linalg.slogdet(a)
+    >>> (sign, logabsdet)
+    (array(-1.), array(0.69314718))
+    >>> sign * dp.exp(logabsdet)
+    array(-2.)
+
+    Computing log-determinants for a stack of matrices:
 
-    .. warning::
-        To produce the same results as :func:`numpy.linalg.slogdet` for
-        singular inputs, set the `linalg` configuration to `raise`.
+    >>> a = dp.array([ [[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]] ])
+    >>> a.shape
+    (3, 2, 2)
+    >>> sign, logabsdet = dp.linalg.slogdet(a)
+    >>> (sign, logabsdet)
+    (array([-1., -1., -1.]), array([0.69314718, 1.09861229, 2.07944154]))
+    >>> sign * dp.exp(logabsdet)
+    array([-2., -3., -8.])
 
-    .. seealso:: :func:`numpy.linalg.slogdet`
     """
 
+    if not dpnp.is_supported_array_type(a):
+        raise TypeError(
+            "An array must be any of supported type, but got {}".format(type(a))
+        )
+
     # TODO: use dpnp.linalg.LinAlgError
     if a.ndim < 2:
         raise ValueError(
@@ -651,6 +682,8 @@ def slogdet(a):
         raise ValueError("Last 2 dimensions of the input array must be square")
 
     exec_q = a.sycl_queue
+    a_usm_type = a.usm_type
+    a_sycl_queue = a.sycl_queue
     # dtype, sign_dtype = _util.linalg_common_type(a)
     # TODO: Use linalg_common_type from #1598
     if dpnp.issubdtype(a.dtype, dpnp.floating):
@@ -675,17 +708,19 @@ def slogdet(a):
 
     if a.size == 0:
         # empty batch (result is empty, too) or empty matrices det([[]]) == 1
-        sign = dpnp.ones(shape, dtype=res_type)
-        logdet = dpnp.zeros(shape, dtype=logdet_dtype)
+        sign = dpnp.ones(
+            shape, dtype=res_type, usm_type=a_usm_type, sycl_queue=a_sycl_queue
+        )
+        logdet = dpnp.zeros(
+            shape,
+            dtype=logdet_dtype,
+            usm_type=a_usm_type,
+            sycl_queue=a_sycl_queue,
+        )
         return sign, logdet
 
     lu, ipiv, dev_info = _lu_factor(a, res_type)
 
-    # dev_info < 0 means illegal value (in dimensions, strides, and etc.) that
-    # should never happen even if the matrix contains nan or inf.
-    # TODO(kataoka): assert dev_info >= 0 if synchronization is allowed for
-    # debugging purposes.
-
     # Transposing 'lu' to swap the last two axes for compatibility
     # with 'dpnp.diagonal' as it does not support 'axis1' and 'axis2' arguments.
     # TODO: Replace with 'dpnp.diagonal(lu, axis1=-2, axis2=-1)' when supported.

From 771605f018430dca77908d2423ba694bf6614127 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 30 Nov 2023 10:59:44 +0100
Subject: [PATCH 21/55] Add more tests

---
 tests/test_linalg.py     | 96 +++++++++++++++++++++++++++++++++++++++-
 tests/test_sycl_queue.py | 43 ++++++++++++++++++
 tests/test_usm_type.py   | 31 +++++++++++++
 3 files changed, 169 insertions(+), 1 deletion(-)

diff --git a/tests/test_linalg.py b/tests/test_linalg.py
index 1631f22e1364..e1ce837a6cd2 100644
--- a/tests/test_linalg.py
+++ b/tests/test_linalg.py
@@ -1,7 +1,7 @@
 import dpctl
 import numpy
 import pytest
-from numpy.testing import assert_allclose, assert_array_equal
+from numpy.testing import assert_allclose, assert_array_equal, assert_raises
 
 import dpnp as inp
 
@@ -508,3 +508,97 @@ def test_svd(type, shape):
         assert_allclose(
             inp.asnumpy(dpnp_vt)[i, :], np_vt[i, :], rtol=tol, atol=tol
         )
+
+
+class TestSlogdet:
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
+    def test_slogdet_2d(self, dtype):
+        a_np = numpy.array([[1, 2], [3, 4]], dtype=dtype)
+        a_dp = inp.array(a_np)
+
+        sign_expected, logdet_expected = numpy.linalg.slogdet(a_np)
+        sign_result, logdet_result = inp.linalg.slogdet(a_dp)
+
+        assert_allclose(sign_expected, sign_result)
+        assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
+    def test_slogdet_3d(self, dtype):
+        a_np = numpy.array(
+            [
+                [[1, 2], [3, 4]],
+                [[1, 2], [2, 1]],
+                [[1, 3], [3, 1]],
+            ],
+            dtype=dtype,
+        )
+        a_dp = inp.array(a_np)
+
+        sign_expected, logdet_expected = numpy.linalg.slogdet(a_np)
+        sign_result, logdet_result = inp.linalg.slogdet(a_dp)
+
+        assert_allclose(sign_expected, sign_result)
+        assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4)
+
+    def test_slogdet_strides(self):
+        a_np = numpy.array(
+            [
+                [2, 3, 1, 4, 5],
+                [5, 6, 7, 8, 9],
+                [9, 7, 7, 2, 3],
+                [1, 4, 5, 1, 8],
+                [8, 9, 8, 5, 3],
+            ]
+        )
+
+        a_dp = inp.array(a_np)
+
+        # positive strides
+        sign_expected, logdet_expected = numpy.linalg.slogdet(a_np[::2, ::2])
+        sign_result, logdet_result = inp.linalg.slogdet(a_dp[::2, ::2])
+        assert_allclose(sign_expected, sign_result)
+        assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4)
+
+        # negative strides
+        sign_expected, logdet_expected = numpy.linalg.slogdet(a_np[::-2, ::-2])
+        sign_result, logdet_result = inp.linalg.slogdet(a_dp[::-2, ::-2])
+        assert_allclose(sign_expected, sign_result)
+        assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4)
+
+    # TODO: remove skipif when MKLD-16626 is resolved
+    @pytest.mark.skipif(is_cpu_device(), reason="MKL bug MKLD-16626")
+    @pytest.mark.parametrize(
+        "matrix",
+        [
+            [[1, 2], [2, 4]],
+            [[0, 0], [0, 0]],
+            [[1, 1], [1, 1]],
+            [[2, 4], [1, 2]],
+            [[1, 2], [0, 0]],
+            [[1, 0], [2, 0]],
+        ],
+        ids=[
+            "Linearly dependent rows",
+            "Zero matrix",
+            "Identical rows",
+            "Linearly dependent columns",
+            "Zero row",
+            "Zero column",
+        ],
+    )
+    def test_slogdet_singular_matrix(self, matrix):
+        a_np = numpy.array(matrix, dtype="float32")
+        a_dp = inp.array(a_np)
+
+        sign_expected, logdet_expected = numpy.linalg.slogdet(a_np)
+        sign_result, logdet_result = inp.linalg.slogdet(a_dp)
+
+        assert_allclose(sign_expected, sign_result)
+        assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4)
+
+    def test_solve_errors(self):
+        a_dp = inp.array([[1, 2], [3, 5]], dtype="float32")
+
+        # unsupported type
+        a_np = inp.asnumpy(a_dp)
+        assert_raises(TypeError, inp.linalg.slogdet, a_np)
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index 6de86cfd915f..5bd80e421518 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -1318,3 +1318,46 @@ def test_take(device):
     result_queue = result.get_array().sycl_queue
 
     assert_sycl_queue_equal(result_queue, expected_queue)
+
+
+@pytest.mark.parametrize(
+    "shape, is_empty",
+    [
+        ((2, 2), False),
+        ((3, 2, 2), False),
+        ((0, 0), True),
+        ((0, 2, 2), True),
+    ],
+    ids=[
+        "(2, 2)",
+        "(3, 2, 2)",
+        "(0, 0)",
+        "(0, 2, 2)",
+    ],
+)
+@pytest.mark.parametrize(
+    "device",
+    valid_devices,
+    ids=[device.filter_string for device in valid_devices],
+)
+def test_slogdet(shape, is_empty, device):
+    if is_empty:
+        numpy_x = numpy.empty(shape, dtype=dpnp.default_float_type())
+    else:
+        count_elem = numpy.prod(shape)
+        numpy_x = numpy.arange(
+            1, count_elem + 1, dtype=dpnp.default_float_type()
+        ).reshape(shape)
+
+    dpnp_x = dpnp.array(numpy_x, device=device)
+
+    sign_result, logdet_result = dpnp.linalg.slogdet(dpnp_x)
+    sign_expected, logdet_expected = numpy.linalg.slogdet(numpy_x)
+    assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4)
+    assert_allclose(sign_expected, sign_result)
+
+    sign_queue = sign_result.sycl_queue
+    logdet_queue = logdet_result.sycl_queue
+
+    assert_sycl_queue_equal(sign_queue, dpnp_x.sycl_queue)
+    assert_sycl_queue_equal(logdet_queue, dpnp_x.sycl_queue)
diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py
index bc0919643bd3..d51f3f2d1ef5 100644
--- a/tests/test_usm_type.py
+++ b/tests/test_usm_type.py
@@ -483,3 +483,34 @@ def test_take(usm_type_x, usm_type_ind):
     assert x.usm_type == usm_type_x
     assert ind.usm_type == usm_type_ind
     assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_ind])
+
+
+@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize(
+    "shape, is_empty",
+    [
+        ((2, 2), False),
+        ((3, 2, 2), False),
+        ((0, 0), True),
+        ((0, 2, 2), True),
+    ],
+    ids=[
+        "(2, 2)",
+        "(3, 2, 2)",
+        "(0, 0)",
+        "(0, 2, 2)",
+    ],
+)
+def test_slogdet(shape, is_empty, usm_type):
+    if is_empty:
+        x = dp.empty(shape, dtype=dp.default_float_type(), usm_type=usm_type)
+    else:
+        count_elem = numpy.prod(shape)
+        x = dp.arange(
+            1, count_elem + 1, dtype=dp.default_float_type(), usm_type=usm_type
+        ).reshape(shape)
+
+    sign, logdet = dp.linalg.slogdet(x)
+
+    assert x.usm_type == sign.usm_type
+    assert x.usm_type == logdet.usm_type

From cde627da8ef1a737246e6c4b0127a311f79128c0 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 30 Nov 2023 13:13:58 +0100
Subject: [PATCH 22/55] Remove accidentally added file

---
 qwe.py | 39 ---------------------------------------
 1 file changed, 39 deletions(-)
 delete mode 100644 qwe.py

diff --git a/qwe.py b/qwe.py
deleted file mode 100644
index ab03b4a3b5ad..000000000000
--- a/qwe.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# import dpnp
-# from dpnp.linalg.
-# import numpy
-
-# a = numpy.arange(1,49,dtype='f4').reshape(2,2,3,4)
-# a = numpy.arange(1,5,dtype='f4').reshape(2,2)
-# a_dp = dpnp.array(a)
-# print(numpy.linalg.svd(a,compute_uv=False))
-# print("===================")
-# print(dpnp.linalg.svd(a_dp,compute_uv=False))
-# print("===================")
-# a_dp = dpnp.array(a,device='cpu')
-# print(dpnp.linalg.svd(a_dp,full_matrices=False))
-
-# print(numpy.linalg.svd(a,full_matrices=False))
-# res_np = numpy.linalg.svd(a)
-# print(res_np[0].shape)
-# print(res_np[1].shape)
-# print(res_np[2].shape)
-# print("===================")
-# print(dpnp.linalg.svd(a_dp, full_matrices=False))
-# res_dp = dpnp.linalg.svd(a_dp)
-# print(res_dp[0].shape)
-# print(res_dp[1].shape)
-# print(res_dp[2].shape)
-# print(a_dp)
-
-
-from dpnp.linalg.dpnp_utils_linalg import _lu_factor
-import dpnp
-import numpy
-import scipy
-
-a = numpy.array([[1, 2], [3, 4]],dtype="f4",order='C')
-a_dp = dpnp.array(a)
-
-print(scipy.linalg.lu_factor(a))
-print("===========================")
-print(_lu_factor(a_dp))

From 694870bf4255d9e92439efadd0f3602291b60b79 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 30 Nov 2023 13:15:32 +0100
Subject: [PATCH 23/55] Modify sign parameter calculation

---
 dpnp/linalg/dpnp_iface_linalg.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py
index cbc742d72440..2a03818b7072 100644
--- a/dpnp/linalg/dpnp_iface_linalg.py
+++ b/dpnp/linalg/dpnp_iface_linalg.py
@@ -721,6 +721,8 @@ def slogdet(a):
 
     lu, ipiv, dev_info = _lu_factor(a, res_type)
 
+    # return lu, ipiv, dev_info
+
     # Transposing 'lu' to swap the last two axes for compatibility
     # with 'dpnp.diagonal' as it does not support 'axis1' and 'axis2' arguments.
     # TODO: Replace with 'dpnp.diagonal(lu, axis1=-2, axis2=-1)' when supported.
@@ -740,13 +742,13 @@ def slogdet(a):
     if res_type.kind == "f":
         non_zero += dpnp.count_nonzero(diag < 0, axis=-1)
 
-    sign = -(1 ** (non_zero % 2))
+    sign = (non_zero % 2) * -2 + 1
     if res_type.kind == "c":
         sign = sign * dpnp.prod(diag / dpnp.abs(diag), axis=-1)
 
     sign = sign.astype(res_type)
     logdet = logdet.astype(logdet_dtype, copy=False)
-    singular = dev_info > 0
+    singular = dpnp.array([dev_info > 0])
     return (
         dpnp.where(singular, res_type.type(0), sign).reshape(shape),
         dpnp.where(singular, logdet_dtype.type("-inf"), logdet).reshape(shape),

From d85d00db79d4cfac0e1c4a134d323519b5fd1896 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 30 Nov 2023 13:24:20 +0100
Subject: [PATCH 24/55] Remove the old backend implementation of dpnp_det

---
 dpnp/backend/include/dpnp_iface_fptr.hpp  |  2 --
 dpnp/backend/kernels/dpnp_krnl_linalg.cpp |  9 -----
 dpnp/dpnp_algo/dpnp_algo.pxd              |  1 -
 dpnp/linalg/dpnp_algo_linalg.pyx          | 42 -----------------------
 4 files changed, 54 deletions(-)

diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp
index c56f38ffcb5e..7ab6852685a2 100644
--- a/dpnp/backend/include/dpnp_iface_fptr.hpp
+++ b/dpnp/backend/include/dpnp_iface_fptr.hpp
@@ -121,8 +121,6 @@ enum class DPNPFuncName : size_t
     DPNP_FN_DEGREES_EXT,   /**< Used in numpy.degrees() impl, requires extra
                               parameters */
     DPNP_FN_DET,           /**< Used in numpy.linalg.det() impl  */
-    DPNP_FN_DET_EXT,       /**< Used in numpy.linalg.det() impl, requires extra
-                              parameters */
     DPNP_FN_DIAG,          /**< Used in numpy.diag() impl  */
     DPNP_FN_DIAG_INDICES,  /**< Used in numpy.diag_indices() impl  */
     DPNP_FN_DIAG_INDICES_EXT, /**< Used in numpy.diag_indices() impl, requires
diff --git a/dpnp/backend/kernels/dpnp_krnl_linalg.cpp b/dpnp/backend/kernels/dpnp_krnl_linalg.cpp
index 5bf54c2b84d3..b3dd23f3c0bc 100644
--- a/dpnp/backend/kernels/dpnp_krnl_linalg.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_linalg.cpp
@@ -866,15 +866,6 @@ void func_map_init_linalg_func(func_map_t &fmap)
     fmap[DPNPFuncName::DPNP_FN_DET][eft_DBL][eft_DBL] = {
         eft_DBL, (void *)dpnp_det_default_c<double>};
 
-    fmap[DPNPFuncName::DPNP_FN_DET_EXT][eft_INT][eft_INT] = {
-        eft_INT, (void *)dpnp_det_ext_c<int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_DET_EXT][eft_LNG][eft_LNG] = {
-        eft_LNG, (void *)dpnp_det_ext_c<int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_DET_EXT][eft_FLT][eft_FLT] = {
-        eft_FLT, (void *)dpnp_det_ext_c<float>};
-    fmap[DPNPFuncName::DPNP_FN_DET_EXT][eft_DBL][eft_DBL] = {
-        eft_DBL, (void *)dpnp_det_ext_c<double>};
-
     fmap[DPNPFuncName::DPNP_FN_INV][eft_INT][eft_INT] = {
         eft_DBL, (void *)dpnp_inv_default_c<int32_t, double>};
     fmap[DPNPFuncName::DPNP_FN_INV][eft_LNG][eft_LNG] = {
diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd
index 80c6035d7a9f..3b0699231538 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pxd
+++ b/dpnp/dpnp_algo/dpnp_algo.pxd
@@ -55,7 +55,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_DEGREES
         DPNP_FN_DEGREES_EXT
         DPNP_FN_DET
-        DPNP_FN_DET_EXT
         DPNP_FN_DIAG_INDICES
         DPNP_FN_DIAG_INDICES_EXT
         DPNP_FN_DIAGONAL
diff --git a/dpnp/linalg/dpnp_algo_linalg.pyx b/dpnp/linalg/dpnp_algo_linalg.pyx
index c86b869acd3c..ff5ea6253db6 100644
--- a/dpnp/linalg/dpnp_algo_linalg.pyx
+++ b/dpnp/linalg/dpnp_algo_linalg.pyx
@@ -47,7 +47,6 @@ cimport dpnp.dpnp_utils as utils
 __all__ = [
     "dpnp_cholesky",
     "dpnp_cond",
-    "dpnp_det",
     "dpnp_eig",
     "dpnp_eigvals",
     "dpnp_inv",
@@ -140,47 +139,6 @@ cpdef object dpnp_cond(object input, object p):
     return ret
 
 
-cpdef utils.dpnp_descriptor dpnp_det(utils.dpnp_descriptor input):
-    cdef shape_type_c input_shape = input.shape
-    cdef size_t n = input.shape[-1]
-    cdef shape_type_c result_shape = (1,)
-    if input.ndim != 2:
-        result_shape = tuple((list(input.shape))[:-2])
-
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype)
-
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_DET_EXT, param1_type, param1_type)
-
-    input_obj = input.get_array()
-
-    # create result array with type given by FPTR data
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
-                                                                       kernel_data.return_type,
-                                                                       None,
-                                                                       device=input_obj.sycl_device,
-                                                                       usm_type=input_obj.usm_type,
-                                                                       sycl_queue=input_obj.sycl_queue)
-
-    result_sycl_queue = result.get_array().sycl_queue
-
-    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
-    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
-
-    cdef custom_linalg_1in_1out_func_ptr_t func = <custom_linalg_1in_1out_func_ptr_t > kernel_data.ptr
-
-    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
-                                                    input.get_data(),
-                                                    result.get_data(),
-                                                    input_shape.data(),
-                                                    input.ndim,
-                                                    NULL)  # dep_events_ref
-
-    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
-    c_dpctl.DPCTLEvent_Delete(event_ref)
-
-    return result
-
-
 cpdef tuple dpnp_eig(utils.dpnp_descriptor x1):
     cdef shape_type_c x1_shape = x1.shape
 

From c4b9992da1abb4a95f71204b6b9cbf7d04096d05 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 30 Nov 2023 15:19:51 +0100
Subject: [PATCH 25/55] qwe

---
 perf.py | 32 ++++++++++++++++++++++++++++++++
 qaz.py  | 17 +++++++++++++++++
 test.py | 27 +++++++++++++++++++++++++++
 3 files changed, 76 insertions(+)
 create mode 100644 perf.py
 create mode 100644 qaz.py
 create mode 100644 test.py

diff --git a/perf.py b/perf.py
new file mode 100644
index 000000000000..de88e2c24132
--- /dev/null
+++ b/perf.py
@@ -0,0 +1,32 @@
+import numpy
+import dpnp
+
+
+# na = numpy.random.randint(-10**4, 10**4, size=(4096,4096))
+
+# a = numpy.array(na, dtype = "int32")
+
+# a_dp = dpnp.array(a,device='cpu')
+
+# def test(xp,dtype,device):
+#     na = numpy.random.randint(-10**4, 10**4, size=(4096,4096))
+
+#     a = numpy.array(na, dtype = dtype)
+#     a_dp = dpnp.array(a,device=device)
+
+#     if xp == 0:
+#         numpy.linalg.slogdet(a)
+#     else:
+#         dpnp.linalg.slogdet(a_dp)
+
+def init(na, dtype):
+    # na = numpy.random.randint(-10**4, 10**4, size=(4096,4096))
+
+    a = numpy.array(na, dtype = dtype)
+    a_dp_cpu = dpnp.array(a,device='cpu')
+    a_dp_gpu = dpnp.array(a,device='gpu')
+
+    numpy.linalg.slogdet(a)
+    dpnp.linalg.slogdet(a_dp_cpu)
+
+    return a,a_dp_cpu,a_dp_gpu
diff --git a/qaz.py b/qaz.py
new file mode 100644
index 000000000000..755f6b25e447
--- /dev/null
+++ b/qaz.py
@@ -0,0 +1,17 @@
+import dpnp
+import numpy
+
+a = numpy.array(
+    [
+        [2, 3, 1, 4, 5],
+        [5, 6, 7, 8, 9],
+        [9, 7, 7, 2, 3],
+        [1, 4, 5, 1, 8],
+        [8, 9, 8, 5, 3],
+    ]
+)
+
+a_dp = dpnp.array(a)
+
+print(numpy.linalg.slogdet(a))
+print(dpnp.linalg.slogdet(a_dp))
diff --git a/test.py b/test.py
new file mode 100644
index 000000000000..fae55c253eb4
--- /dev/null
+++ b/test.py
@@ -0,0 +1,27 @@
+import dpnp
+import numpy
+import cProfile, pstats, io
+
+
+# na = numpy.random.randint(-10**4, 10**4, size=(4096,4096))
+na = numpy.random.randint(-10**4, 10**4, size=(8192,8192))
+a = numpy.array(na, dtype = "int32")
+a_dp = dpnp.array(a, device='cpu')
+
+a_stride = a[::2,::2]
+a_dp_stride = a_dp[::2,::2]
+dpnp.linalg.slogdet(a_dp_stride)
+
+pr = cProfile.Profile()
+pr.enable()
+# res = numpy.linalg.slogdet(a)
+res = dpnp.linalg.slogdet(a_dp_stride)
+pr.disable()
+
+s = io.StringIO()
+sortBy = pstats.SortKey.CUMULATIVE
+
+ps = pstats.Stats(pr, stream=s).sort_stats(sortBy)
+ps.print_stats()
+
+print(s.getvalue())

From 78b98e73911e19a95e7ff4044724aa4487395d94 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 15 Dec 2023 12:52:38 +0100
Subject: [PATCH 26/55] Keep lexographical order

---
 dpnp/backend/extensions/lapack/lapack_py.cpp  | 28 +++----
 .../extensions/lapack/types_matrix.hpp        | 75 ++++++++++---------
 2 files changed, 52 insertions(+), 51 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/lapack_py.cpp b/dpnp/backend/extensions/lapack/lapack_py.cpp
index 71fcb0067aab..acf26989edd9 100644
--- a/dpnp/backend/extensions/lapack/lapack_py.cpp
+++ b/dpnp/backend/extensions/lapack/lapack_py.cpp
@@ -71,20 +71,6 @@ PYBIND11_MODULE(_lapack_impl, m)
           py::arg("sycl_queue"), py::arg("coeff_matrix"),
           py::arg("dependent_vals"), py::arg("depends") = py::list());
 
-    m.def("_heevd", &lapack_ext::heevd,
-          "Call `heevd` from OneMKL LAPACK library to return "
-          "the eigenvalues and eigenvectors of a complex Hermitian matrix",
-          py::arg("sycl_queue"), py::arg("jobz"), py::arg("upper_lower"),
-          py::arg("eig_vecs"), py::arg("eig_vals"),
-          py::arg("depends") = py::list());
-
-    m.def("_syevd", &lapack_ext::syevd,
-          "Call `syevd` from OneMKL LAPACK library to return "
-          "the eigenvalues and eigenvectors of a real symmetric matrix",
-          py::arg("sycl_queue"), py::arg("jobz"), py::arg("upper_lower"),
-          py::arg("eig_vecs"), py::arg("eig_vals"),
-          py::arg("depends") = py::list());
-
     m.def("_getrf", &lapack_ext::getrf,
           "Call `getrf` from OneMKL LAPACK library to return "
           "the LU factorization of a general n x n matrix",
@@ -99,4 +85,18 @@ PYBIND11_MODULE(_lapack_impl, m)
           py::arg("dev_info_array"), py::arg("n"), py::arg("stride_a"),
           py::arg("stride_ipiv"), py::arg("batch_size"),
           py::arg("depends") = py::list());
+
+    m.def("_heevd", &lapack_ext::heevd,
+          "Call `heevd` from OneMKL LAPACK library to return "
+          "the eigenvalues and eigenvectors of a complex Hermitian matrix",
+          py::arg("sycl_queue"), py::arg("jobz"), py::arg("upper_lower"),
+          py::arg("eig_vecs"), py::arg("eig_vals"),
+          py::arg("depends") = py::list());
+
+    m.def("_syevd", &lapack_ext::syevd,
+          "Call `syevd` from OneMKL LAPACK library to return "
+          "the eigenvalues and eigenvectors of a real symmetric matrix",
+          py::arg("sycl_queue"), py::arg("jobz"), py::arg("upper_lower"),
+          py::arg("eig_vecs"), py::arg("eig_vals"),
+          py::arg("depends") = py::list());
 }
diff --git a/dpnp/backend/extensions/lapack/types_matrix.hpp b/dpnp/backend/extensions/lapack/types_matrix.hpp
index ecce0a0c0bec..59198e975c8a 100644
--- a/dpnp/backend/extensions/lapack/types_matrix.hpp
+++ b/dpnp/backend/extensions/lapack/types_matrix.hpp
@@ -69,43 +69,6 @@ struct GesvTypePairSupportFactory
         // fall-through
         dpctl_td_ns::NotDefinedEntry>::is_defined;
 };
-/**
- * @brief A factory to define pairs of supported types for which
- * MKL LAPACK library provides support in oneapi::mkl::lapack::heevd<T, RealT>
- * function.
- *
- * @tparam T Type of array containing input matrix A and an output array with
- * eigenvectors.
- * @tparam RealT Type of output array containing eigenvalues of A.
- */
-template <typename T, typename RealT>
-struct HeevdTypePairSupportFactory
-{
-    static constexpr bool is_defined = std::disjunction<
-        dpctl_td_ns::
-            TypePairDefinedEntry<T, std::complex<double>, RealT, double>,
-        dpctl_td_ns::TypePairDefinedEntry<T, std::complex<float>, RealT, float>,
-        // fall-through
-        dpctl_td_ns::NotDefinedEntry>::is_defined;
-};
-
-/**
- * @brief A factory to define pairs of supported types for which
- * MKL LAPACK library provides support in oneapi::mkl::lapack::syevd<T>
- * function.
- *
- * @tparam T Type of array containing input matrix A and an output arrays with
- * eigenvectors and eigenvectors.
- */
-template <typename T>
-struct SyevdTypePairSupportFactory
-{
-    static constexpr bool is_defined = std::disjunction<
-        dpctl_td_ns::TypePairDefinedEntry<T, double, T, double>,
-        dpctl_td_ns::TypePairDefinedEntry<T, float, T, float>,
-        // fall-through
-        dpctl_td_ns::NotDefinedEntry>::is_defined;
-};
 
 /**
  * @brief A factory to define pairs of supported types for which
@@ -158,6 +121,44 @@ struct GetrfBatchTypePairSupportFactory
         // fall-through
         dpctl_td_ns::NotDefinedEntry>::is_defined;
 };
+
+/**
+ * @brief A factory to define pairs of supported types for which
+ * MKL LAPACK library provides support in oneapi::mkl::lapack::heevd<T, RealT>
+ * function.
+ *
+ * @tparam T Type of array containing input matrix A and an output array with
+ * eigenvectors.
+ * @tparam RealT Type of output array containing eigenvalues of A.
+ */
+template <typename T, typename RealT>
+struct HeevdTypePairSupportFactory
+{
+    static constexpr bool is_defined = std::disjunction<
+        dpctl_td_ns::
+            TypePairDefinedEntry<T, std::complex<double>, RealT, double>,
+        dpctl_td_ns::TypePairDefinedEntry<T, std::complex<float>, RealT, float>,
+        // fall-through
+        dpctl_td_ns::NotDefinedEntry>::is_defined;
+};
+
+/**
+ * @brief A factory to define pairs of supported types for which
+ * MKL LAPACK library provides support in oneapi::mkl::lapack::syevd<T>
+ * function.
+ *
+ * @tparam T Type of array containing input matrix A and an output arrays with
+ * eigenvectors and eigenvectors.
+ */
+template <typename T>
+struct SyevdTypePairSupportFactory
+{
+    static constexpr bool is_defined = std::disjunction<
+        dpctl_td_ns::TypePairDefinedEntry<T, double, T, double>,
+        dpctl_td_ns::TypePairDefinedEntry<T, float, T, float>,
+        // fall-through
+        dpctl_td_ns::NotDefinedEntry>::is_defined;
+};
 } // namespace types
 } // namespace lapack
 } // namespace ext

From 46a996566fc2006d510b245c1472329abd9c3b20 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 15 Dec 2023 13:12:13 +0100
Subject: [PATCH 27/55] Add dpnp_slogdet to dpnp_utils_linalg

---
 dpnp/linalg/dpnp_iface_linalg.py | 94 ++------------------------------
 dpnp/linalg/dpnp_utils_linalg.py | 67 ++++++++++++++++++++++-
 2 files changed, 71 insertions(+), 90 deletions(-)

diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py
index 3c91e7ff294e..384543e268ae 100644
--- a/dpnp/linalg/dpnp_iface_linalg.py
+++ b/dpnp/linalg/dpnp_iface_linalg.py
@@ -48,10 +48,10 @@
 from dpnp.linalg.dpnp_algo_linalg import *
 
 from .dpnp_utils_linalg import (
-    _lu_factor,
     check_stacked_2d,
     check_stacked_square,
     dpnp_eigh,
+    dpnp_slogdet,
     dpnp_solve,
 )
 
@@ -721,92 +721,8 @@ def slogdet(a):
 
     """
 
-    if not dpnp.is_supported_array_type(a):
-        raise TypeError(
-            "An array must be any of supported type, but got {}".format(type(a))
-        )
-
-    # TODO: use dpnp.linalg.LinAlgError
-    if a.ndim < 2:
-        raise ValueError(
-            f"{a.ndim}-dimensional array given. The input "
-            "array must be at least two-dimensional"
-        )
-
-    n, m = a.shape[-2:]
-    # TODO: use dpnp.linalg.LinAlgError
-    if m != n:
-        raise ValueError("Last 2 dimensions of the input array must be square")
-
-    exec_q = a.sycl_queue
-    a_usm_type = a.usm_type
-    a_sycl_queue = a.sycl_queue
-    # dtype, sign_dtype = _util.linalg_common_type(a)
-    # TODO: Use linalg_common_type from #1598
-    if dpnp.issubdtype(a.dtype, dpnp.floating):
-        res_type = (
-            a.dtype if exec_q.sycl_device.has_aspect_fp64 else dpnp.float32
-        )
-    elif dpnp.issubdtype(a.dtype, dpnp.complexfloating):
-        res_type = (
-            a.dtype if exec_q.sycl_device.has_aspect_fp64 else dpnp.complex64
-        )
-    else:
-        res_type = (
-            dpnp.float64 if exec_q.sycl_device.has_aspect_fp64 else dpnp.float32
-        )
-
-    res_type = dpnp.dtype(res_type)
-    logdet_dtype = dpnp.dtype(res_type.char.lower())
-
-    a_shape = a.shape
-    shape = a_shape[:-2]
-    n = a_shape[-2]
-
-    if a.size == 0:
-        # empty batch (result is empty, too) or empty matrices det([[]]) == 1
-        sign = dpnp.ones(
-            shape, dtype=res_type, usm_type=a_usm_type, sycl_queue=a_sycl_queue
-        )
-        logdet = dpnp.zeros(
-            shape,
-            dtype=logdet_dtype,
-            usm_type=a_usm_type,
-            sycl_queue=a_sycl_queue,
-        )
-        return sign, logdet
-
-    lu, ipiv, dev_info = _lu_factor(a, res_type)
-
-    # return lu, ipiv, dev_info
-
-    # Transposing 'lu' to swap the last two axes for compatibility
-    # with 'dpnp.diagonal' as it does not support 'axis1' and 'axis2' arguments.
-    # TODO: Replace with 'dpnp.diagonal(lu, axis1=-2, axis2=-1)' when supported.
-    lu_transposed = lu.transpose(-2, -1, *range(lu.ndim - 2))
-    diag = dpnp.diagonal(lu_transposed)
-
-    logdet = dpnp.log(dpnp.abs(diag)).sum(axis=-1)
+    dpnp.check_supported_arrays_type(a)
+    check_stacked_2d(a)
+    check_stacked_square(a)
 
-    # ipiv is 1-origin
-    non_zero = dpnp.count_nonzero(
-        ipiv
-        != dpnp.arange(
-            1, n + 1, usm_type=ipiv.usm_type, sycl_queue=ipiv.sycl_queue
-        ),
-        axis=-1,
-    )
-    if res_type.kind == "f":
-        non_zero += dpnp.count_nonzero(diag < 0, axis=-1)
-
-    sign = (non_zero % 2) * -2 + 1
-    if res_type.kind == "c":
-        sign = sign * dpnp.prod(diag / dpnp.abs(diag), axis=-1)
-
-    sign = sign.astype(res_type)
-    logdet = logdet.astype(logdet_dtype, copy=False)
-    singular = dpnp.array([dev_info > 0])
-    return (
-        dpnp.where(singular, res_type.type(0), sign).reshape(shape),
-        dpnp.where(singular, logdet_dtype.type("-inf"), logdet).reshape(shape),
-    )
+    return dpnp_slogdet(a)
diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index 0ad62bdb9f06..83fd83a9f4b6 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -35,11 +35,11 @@
 from dpnp.dpnp_utils import get_usm_allocations
 
 __all__ = [
-    "_lu_factor",
     "check_stacked_2d",
     "check_stacked_square",
     "dpnp_eigh",
     "dpnp_solve",
+    "dpnp_slogdet",
 ]
 
 _jobz = {"N": 0, "V": 1}
@@ -619,3 +619,68 @@ def dpnp_solve(a, b):
         a_ht_copy_ev.wait()
 
         return b_f
+
+
+def dpnp_slogdet(a):
+    """
+    dpnp_slogdet(a)
+
+    Returns sign and logarithm of the determinant of `a` array.
+
+    """
+
+    a_usm_type = a.usm_type
+    a_sycl_queue = a.sycl_queue
+
+    res_type = _common_type(a)
+    logdet_dtype = dpnp.dtype(res_type.char.lower())
+
+    a_shape = a.shape
+    shape = a_shape[:-2]
+    n = a_shape[-2]
+
+    if a.size == 0:
+        # empty batch (result is empty, too) or empty matrices det([[]]) == 1
+        sign = dpnp.ones(
+            shape, dtype=res_type, usm_type=a_usm_type, sycl_queue=a_sycl_queue
+        )
+        logdet = dpnp.zeros(
+            shape,
+            dtype=logdet_dtype,
+            usm_type=a_usm_type,
+            sycl_queue=a_sycl_queue,
+        )
+        return sign, logdet
+
+    lu, ipiv, dev_info = _lu_factor(a, res_type)
+
+    # Transposing 'lu' to swap the last two axes for compatibility
+    # with 'dpnp.diagonal' as it does not support 'axis1' and 'axis2' arguments.
+    # TODO: Replace with 'dpnp.diagonal(lu, axis1=-2, axis2=-1)' when supported.
+    lu_transposed = lu.transpose(-2, -1, *range(lu.ndim - 2))
+    diag = dpnp.diagonal(lu_transposed)
+
+    logdet = dpnp.log(dpnp.abs(diag)).sum(axis=-1)
+
+    # ipiv is 1-origin
+    non_zero = dpnp.count_nonzero(
+        ipiv
+        != dpnp.arange(
+            1, n + 1, usm_type=ipiv.usm_type, sycl_queue=ipiv.sycl_queue
+        ),
+        axis=-1,
+    )
+    if res_type.kind == "f":
+        non_zero += dpnp.count_nonzero(diag < 0, axis=-1)
+
+    sign = (non_zero % 2) * -2 + 1
+    if res_type.kind == "c":
+        sign = sign * dpnp.prod(diag / dpnp.abs(diag), axis=-1)
+
+    sign = sign.astype(res_type)
+    logdet = logdet.astype(logdet_dtype, copy=False)
+    singular = dpnp.array([dev_info > 0])
+    return (
+        dpnp.where(singular, res_type.type(0), sign).reshape(shape),
+        dpnp.where(singular, logdet_dtype.type("-inf"), logdet).reshape(shape),
+    )

From da16383c55b32a80c2a18c9c327c2d92638577c8 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 15 Dec 2023 13:14:15 +0100
Subject: [PATCH 28/55] Move _lu_factor above

---
 dpnp/linalg/dpnp_utils_linalg.py | 256 +++++++++++++++----------------
 1 file changed, 128 insertions(+), 128 deletions(-)

diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index 83fd83a9f4b6..ae29c30cb59d 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -175,134 +175,6 @@ def _common_inexact_type(default_dtype, *dtypes):
     return dpnp.result_type(*inexact_dtypes)
 
 
-def dpnp_eigh(a, UPLO):
-    """
-    dpnp_eigh(a, UPLO)
-
-    Return the eigenvalues and eigenvectors of a complex Hermitian
-    (conjugate symmetric) or a real symmetric matrix.
-
-    The main calculation is done by calling an extension function
-    for LAPACK library of OneMKL. Depending on input type of `a` array,
-    it will be either ``heevd`` (for complex types) or ``syevd`` (for others).
-
-    """
-
-    a_usm_type = a.usm_type
-    a_sycl_queue = a.sycl_queue
-    a_order = "C" if a.flags.c_contiguous else "F"
-    a_usm_arr = dpnp.get_usm_ndarray(a)
-
-    # 'V' means both eigenvectors and eigenvalues will be calculated
-    jobz = _jobz["V"]
-    uplo = _upper_lower[UPLO]
-
-    # get resulting type of arrays with eigenvalues and eigenvectors
-    a_dtype = a.dtype
-    lapack_func = "_syevd"
-    if dpnp.issubdtype(a_dtype, dpnp.complexfloating):
-        lapack_func = "_heevd"
-        v_type = a_dtype
-        w_type = dpnp.float64 if a_dtype == dpnp.complex128 else dpnp.float32
-    elif dpnp.issubdtype(a_dtype, dpnp.floating):
-        v_type = w_type = a_dtype
-    elif a_sycl_queue.sycl_device.has_aspect_fp64:
-        v_type = w_type = dpnp.float64
-    else:
-        v_type = w_type = dpnp.float32
-
-    if a.ndim > 2:
-        w = dpnp.empty(
-            a.shape[:-1],
-            dtype=w_type,
-            usm_type=a_usm_type,
-            sycl_queue=a_sycl_queue,
-        )
-
-        # need to loop over the 1st dimension to get eigenvalues and eigenvectors of 3d matrix A
-        op_count = a.shape[0]
-        if op_count == 0:
-            return w, dpnp.empty_like(a, dtype=v_type)
-
-        eig_vecs = [None] * op_count
-        ht_copy_ev = [None] * op_count
-        ht_lapack_ev = [None] * op_count
-        for i in range(op_count):
-            # oneMKL LAPACK assumes fortran-like array as input, so
-            # allocate a memory with 'F' order for dpnp array of eigenvectors
-            eig_vecs[i] = dpnp.empty_like(a[i], order="F", dtype=v_type)
-
-            # use DPCTL tensor function to fill the array of eigenvectors with content of input array
-            ht_copy_ev[i], copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
-                src=a_usm_arr[i],
-                dst=eig_vecs[i].get_array(),
-                sycl_queue=a_sycl_queue,
-            )
-
-            # call LAPACK extension function to get eigenvalues and eigenvectors of a portion of matrix A
-            ht_lapack_ev[i], _ = getattr(li, lapack_func)(
-                a_sycl_queue,
-                jobz,
-                uplo,
-                eig_vecs[i].get_array(),
-                w[i].get_array(),
-                depends=[copy_ev],
-            )
-
-        for i in range(op_count):
-            ht_lapack_ev[i].wait()
-            ht_copy_ev[i].wait()
-
-        # combine the list of eigenvectors into a single array
-        v = dpnp.array(eig_vecs, order=a_order)
-        return w, v
-    else:
-        # oneMKL LAPACK assumes fortran-like array as input, so
-        # allocate a memory with 'F' order for dpnp array of eigenvectors
-        v = dpnp.empty_like(a, order="F", dtype=v_type)
-
-        # use DPCTL tensor function to fill the array of eigenvectors with content of input array
-        ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
-            src=a_usm_arr, dst=v.get_array(), sycl_queue=a_sycl_queue
-        )
-
-        # allocate a memory for dpnp array of eigenvalues
-        w = dpnp.empty(
-            a.shape[:-1],
-            dtype=w_type,
-            usm_type=a_usm_type,
-            sycl_queue=a_sycl_queue,
-        )
-
-        # call LAPACK extension function to get eigenvalues and eigenvectors of matrix A
-        ht_lapack_ev, lapack_ev = getattr(li, lapack_func)(
-            a_sycl_queue,
-            jobz,
-            uplo,
-            v.get_array(),
-            w.get_array(),
-            depends=[copy_ev],
-        )
-
-        if a_order != "F":
-            # need to align order of eigenvectors with one of input matrix A
-            out_v = dpnp.empty_like(v, order=a_order)
-            ht_copy_out_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray(
-                src=v.get_array(),
-                dst=out_v.get_array(),
-                sycl_queue=a_sycl_queue,
-                depends=[lapack_ev],
-            )
-            ht_copy_out_ev.wait()
-        else:
-            out_v = v
-
-        ht_lapack_ev.wait()
-        ht_copy_ev.wait()
-
-        return w, out_v
-
-
 def _lu_factor(a, res_type):
     """Compute pivoted LU decomposition.
 
@@ -489,6 +361,134 @@ def _lu_factor(a, res_type):
         return (a_h, ipiv_h, dev_info_h)
 
 
+def dpnp_eigh(a, UPLO):
+    """
+    dpnp_eigh(a, UPLO)
+
+    Return the eigenvalues and eigenvectors of a complex Hermitian
+    (conjugate symmetric) or a real symmetric matrix.
+
+    The main calculation is done by calling an extension function
+    for LAPACK library of OneMKL. Depending on input type of `a` array,
+    it will be either ``heevd`` (for complex types) or ``syevd`` (for others).
+
+    """
+
+    a_usm_type = a.usm_type
+    a_sycl_queue = a.sycl_queue
+    a_order = "C" if a.flags.c_contiguous else "F"
+    a_usm_arr = dpnp.get_usm_ndarray(a)
+
+    # 'V' means both eigenvectors and eigenvalues will be calculated
+    jobz = _jobz["V"]
+    uplo = _upper_lower[UPLO]
+
+    # get resulting type of arrays with eigenvalues and eigenvectors
+    a_dtype = a.dtype
+    lapack_func = "_syevd"
+    if dpnp.issubdtype(a_dtype, dpnp.complexfloating):
+        lapack_func = "_heevd"
+        v_type = a_dtype
+        w_type = dpnp.float64 if a_dtype == dpnp.complex128 else dpnp.float32
+    elif dpnp.issubdtype(a_dtype, dpnp.floating):
+        v_type = w_type = a_dtype
+    elif a_sycl_queue.sycl_device.has_aspect_fp64:
+        v_type = w_type = dpnp.float64
+    else:
+        v_type = w_type = dpnp.float32
+
+    if a.ndim > 2:
+        w = dpnp.empty(
+            a.shape[:-1],
+            dtype=w_type,
+            usm_type=a_usm_type,
+            sycl_queue=a_sycl_queue,
+        )
+
+        # need to loop over the 1st dimension to get eigenvalues and eigenvectors of 3d matrix A
+        op_count = a.shape[0]
+        if op_count == 0:
+            return w, dpnp.empty_like(a, dtype=v_type)
+
+        eig_vecs = [None] * op_count
+        ht_copy_ev = [None] * op_count
+        ht_lapack_ev = [None] * op_count
+        for i in range(op_count):
+            # oneMKL LAPACK assumes fortran-like array as input, so
+            # allocate a memory with 'F' order for dpnp array of eigenvectors
+            eig_vecs[i] = dpnp.empty_like(a[i], order="F", dtype=v_type)
+
+            # use DPCTL tensor function to fill the array of eigenvectors with content of input array
+            ht_copy_ev[i], copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
+                src=a_usm_arr[i],
+                dst=eig_vecs[i].get_array(),
+                sycl_queue=a_sycl_queue,
+            )
+
+            # call LAPACK extension function to get eigenvalues and eigenvectors of a portion of matrix A
+            ht_lapack_ev[i], _ = getattr(li, lapack_func)(
+                a_sycl_queue,
+                jobz,
+                uplo,
+                eig_vecs[i].get_array(),
+                w[i].get_array(),
+                depends=[copy_ev],
+            )
+
+        for i in range(op_count):
+            ht_lapack_ev[i].wait()
+            ht_copy_ev[i].wait()
+
+        # combine the list of eigenvectors into a single array
+        v = dpnp.array(eig_vecs, order=a_order)
+        return w, v
+    else:
+        # oneMKL LAPACK assumes fortran-like array as input, so
+        # allocate a memory with 'F' order for dpnp array of eigenvectors
+        v = dpnp.empty_like(a, order="F", dtype=v_type)
+
+        # use DPCTL tensor function to fill the array of eigenvectors with content of input array
+        ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
+            src=a_usm_arr, dst=v.get_array(), sycl_queue=a_sycl_queue
+        )
+
+        # allocate a memory for dpnp array of eigenvalues
+        w = dpnp.empty(
+            a.shape[:-1],
+            dtype=w_type,
+            usm_type=a_usm_type,
+            sycl_queue=a_sycl_queue,
+        )
+
+        # call LAPACK extension function to get eigenvalues and eigenvectors of matrix A
+        ht_lapack_ev, lapack_ev = getattr(li, lapack_func)(
+            a_sycl_queue,
+            jobz,
+            uplo,
+            v.get_array(),
+            w.get_array(),
+            depends=[copy_ev],
+        )
+
+        if a_order != "F":
+            # need to align order of eigenvectors with one of input matrix A
+            out_v = dpnp.empty_like(v, order=a_order)
+            ht_copy_out_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray(
+                src=v.get_array(),
+                dst=out_v.get_array(),
+                sycl_queue=a_sycl_queue,
+                depends=[lapack_ev],
+            )
+            ht_copy_out_ev.wait()
+        else:
+            out_v = v
+
+        ht_lapack_ev.wait()
+        ht_copy_ev.wait()
+
+        return w, out_v
+
+
 def dpnp_solve(a, b):
     """
     dpnp_solve(a, b)

From 4e0e1831a9bb4adfe825abf0c83158821af7510b Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 15 Dec 2023 13:54:39 +0100
Subject: [PATCH 29/55] A minor update

---
 dpnp/backend/extensions/lapack/getrf.cpp      | 21 ++++++++-------
 dpnp/backend/extensions/lapack/getrf.hpp      |  2 +-
 .../backend/extensions/lapack/getrf_batch.cpp | 27 +++++++++----------
 dpnp/backend/extensions/lapack/lapack_py.cpp  |  4 +--
 dpnp/linalg/dpnp_utils_linalg.py              |  4 +--
 tests/test_sycl_queue.py                      |  4 +--
 6 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp
index a83d94ac5069..3fe7927a9a01 100644
--- a/dpnp/backend/extensions/lapack/getrf.cpp
+++ b/dpnp/backend/extensions/lapack/getrf.cpp
@@ -72,7 +72,7 @@ static sycl::event getrf_impl(sycl::queue exec_q,
     T *a = reinterpret_cast<T *>(in_a);
 
     const std::int64_t scratchpad_size =
-        oneapi::mkl::lapack::getrf_scratchpad_size<T>(exec_q, n, n, lda);
+        mkl_lapack::getrf_scratchpad_size<T>(exec_q, n, n, lda);
     T *scratchpad = nullptr;
 
     std::stringstream error_msg;
@@ -82,13 +82,16 @@ static sycl::event getrf_impl(sycl::queue exec_q,
     try {
         scratchpad = sycl::malloc_device<T>(scratchpad_size, exec_q);
 
-        getrf_event = oneapi::mkl::lapack::getrf(
+        getrf_event = mkl_lapack::getrf(
             exec_q,
-            n,          // Order of the square matrix; (0 ≤ n).
-            n,          // Order of the square matrix; (0 ≤ n).
-            a,          // Pointer to the n-by-n matrix.
-            lda,        // The leading dimension of `a`.
-            ipiv,       // Pointer to the array of pivot indices.
+            n,    // The order of the square matrix A (0 ≤ n).
+                  // It must be a non-negative integer.
+            n,    // The number of columns in the square matrix A (0 ≤ n).
+                  // It must be a non-negative integer.
+            a,    // Pointer to the square matrix A (n x n).
+            lda,  // The leading dimension of matrix A.
+                  // It must be at least max(1, n).
+            ipiv, // Pointer to the output array of pivot indices.
             scratchpad, // Pointer to scratchpad memory to be used by MKL
                         // routine for storing intermediate results.
             scratchpad_size, depends);
@@ -132,10 +135,10 @@ static sycl::event getrf_impl(sycl::queue exec_q,
 
 std::pair<sycl::event, sycl::event>
     getrf(sycl::queue q,
-          const std::int64_t n,
           dpctl::tensor::usm_ndarray a_array,
           dpctl::tensor::usm_ndarray ipiv_array,
           dpctl::tensor::usm_ndarray dev_info_array,
+          const std::int64_t n,
           const std::vector<sycl::event> &depends)
 {
 
@@ -153,8 +156,6 @@ std::pair<sycl::event, sycl::event>
     char *a_array_data = a_array.get_data();
     const std::int64_t lda = std::max<size_t>(1UL, n);
 
-    // check valid ipiv
-
     char *ipiv_array_data = ipiv_array.get_data();
     std::int64_t *d_ipiv = reinterpret_cast<std::int64_t *>(ipiv_array_data);
 
diff --git a/dpnp/backend/extensions/lapack/getrf.hpp b/dpnp/backend/extensions/lapack/getrf.hpp
index e66edefc52cd..38db17bd9a03 100644
--- a/dpnp/backend/extensions/lapack/getrf.hpp
+++ b/dpnp/backend/extensions/lapack/getrf.hpp
@@ -40,10 +40,10 @@ namespace lapack
 {
 extern std::pair<sycl::event, sycl::event>
     getrf(sycl::queue exec_q,
-          const std::int64_t n,
           dpctl::tensor::usm_ndarray a_array,
           dpctl::tensor::usm_ndarray ipiv_array,
           dpctl::tensor::usm_ndarray dev_info_array,
+          const std::int64_t n,
           const std::vector<sycl::event> &depends = {});
 
 extern std::pair<sycl::event, sycl::event>
diff --git a/dpnp/backend/extensions/lapack/getrf_batch.cpp b/dpnp/backend/extensions/lapack/getrf_batch.cpp
index b552358965d1..b15fac74efee 100644
--- a/dpnp/backend/extensions/lapack/getrf_batch.cpp
+++ b/dpnp/backend/extensions/lapack/getrf_batch.cpp
@@ -80,8 +80,8 @@ static sycl::event getrf_batch_impl(sycl::queue exec_q,
     T *a = reinterpret_cast<T *>(in_a);
 
     const std::int64_t scratchpad_size =
-        oneapi::mkl::lapack::getrf_batch_scratchpad_size<T>(
-            exec_q, n, n, lda, stride_a, stride_ipiv, batch_size);
+        mkl_lapack::getrf_batch_scratchpad_size<T>(exec_q, n, n, lda, stride_a,
+                                                   stride_ipiv, batch_size);
     T *scratchpad = nullptr;
 
     std::stringstream error_msg;
@@ -91,19 +91,20 @@ static sycl::event getrf_batch_impl(sycl::queue exec_q,
     try {
         scratchpad = sycl::malloc_device<T>(scratchpad_size, exec_q);
 
-        getrf_batch_event = oneapi::mkl::lapack::getrf_batch(
+        getrf_batch_event = mkl_lapack::getrf_batch(
             exec_q,
-            n,           // Order of each square matrix in the batch; (0 ≤ n).
-            n,           // Order of each square matrix in the batch; (0 ≤ n).
-            a,           // Pointer to the batch of matrices.
-            lda,         // The leading dimension of `a`.
-            stride_a,    // Stride between matrices: Element spacing between
-                         // matrices in `a`.
-            ipiv,        // Pivot indices: Pointer to the pivot indices for each
-                         // matrix.
+            n, // The order of each square matrix in the batch; (0 ≤ n).
+               // It must be a non-negative integer.
+            n, // The number of columns in each matrix in the batch; (0 ≤ n).
+               // It must be a non-negative integer.
+            a, // Pointer to the batch of square matrices, each of size (n x n).
+            lda,      // The leading dimension of each matrix in the batch.
+            stride_a, // Stride between consecutive matrices in the batch.
+            ipiv, // Pointer to the array of pivot indices for each matrix in
+                  // the batch.
             stride_ipiv, // Stride between pivot indices: Spacing between pivot
                          // arrays in 'ipiv'.
-            batch_size,  // Total number of matrices in the batch.
+            batch_size,  // Stride between pivot index arrays in the batch.
             scratchpad,  // Pointer to scratchpad memory to be used by MKL
                          // routine for storing intermediate results.
             scratchpad_size, depends);
@@ -172,8 +173,6 @@ std::pair<sycl::event, sycl::event>
     char *a_array_data = a_array.get_data();
     const std::int64_t lda = std::max<size_t>(1UL, n);
 
-    // check valid ipiv
-
     char *ipiv_array_data = ipiv_array.get_data();
     std::int64_t *d_ipiv = reinterpret_cast<std::int64_t *>(ipiv_array_data);
 
diff --git a/dpnp/backend/extensions/lapack/lapack_py.cpp b/dpnp/backend/extensions/lapack/lapack_py.cpp
index acf26989edd9..2b5ad61678f6 100644
--- a/dpnp/backend/extensions/lapack/lapack_py.cpp
+++ b/dpnp/backend/extensions/lapack/lapack_py.cpp
@@ -74,8 +74,8 @@ PYBIND11_MODULE(_lapack_impl, m)
     m.def("_getrf", &lapack_ext::getrf,
           "Call `getrf` from OneMKL LAPACK library to return "
           "the LU factorization of a general n x n matrix",
-          py::arg("sycl_queue"), py::arg("n"), py::arg("a_array"),
-          py::arg("ipiv_array"), py::arg("dev_info_array"),
+          py::arg("sycl_queue"), py::arg("a_array"), py::arg("ipiv_array"),
+          py::arg("dev_info_array"), py::arg("n"),
           py::arg("depends") = py::list());
 
     m.def("_getrf_batch", &lapack_ext::getrf_batch,
diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index ae29c30cb59d..41629f5c4430 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -305,10 +305,10 @@ def _lu_factor(a, res_type):
                 # to perform LU decomposition on each batch in 'a_vecs[i]'
                 ht_lapack_ev[i], _ = li._getrf(
                     a_sycl_queue,
-                    n,
                     a_vecs[i].get_array(),
                     ipiv_vecs[i].get_array(),
                     dev_info_vecs[i].get_array(),
+                    n,
                     [a_copy_ev],
                 )
 
@@ -345,10 +345,10 @@ def _lu_factor(a, res_type):
         # to perform LU decomposition on the input matrix
         ht_lapack_ev, _ = li._getrf(
             a_sycl_queue,
-            n,
             a_h.get_array(),
             ipiv_h.get_array(),
             dev_info_h.get_array(),
+            n,
             [a_copy_ev],
         )
 
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index 90ccd2a4525d..aca7030c85cb 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -1393,11 +1393,11 @@ def test_solve(device):
 )
 def test_slogdet(shape, is_empty, device):
     if is_empty:
-        numpy_x = numpy.empty(shape, dtype=dpnp.default_float_type())
+        numpy_x = numpy.empty(shape, dtype=dpnp.default_float_type(device))
     else:
         count_elem = numpy.prod(shape)
         numpy_x = numpy.arange(
-            1, count_elem + 1, dtype=dpnp.default_float_type()
+            1, count_elem + 1, dtype=dpnp.default_float_type(device)
         ).reshape(shape)
 
     dpnp_x = dpnp.array(numpy_x, device=device)

From 80d8188215f6f388f8591d861715a253b56dbfbd Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 15 Dec 2023 19:44:02 +0100
Subject: [PATCH 30/55] A minor changes for _lu_factor

---
 dpnp/linalg/dpnp_utils_linalg.py | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index 41629f5c4430..8323117a0b70 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -194,6 +194,8 @@ def _lu_factor(a, res_type):
         piv (dpnp.ndarray):
             1-origin pivot indices with dimension
             ``(..., N)``.
+        dev_info (dpnp.ndarray):
+            ``getrf`` or `getrf_batch` info with dimension ``(...)``.
 
     See Also
     --------
@@ -201,17 +203,7 @@ def _lu_factor(a, res_type):
 
     """
 
-    # TODO: use dpnp.linalg.LinAlgError
-    if a.ndim < 2:
-        raise ValueError(
-            f"{a.ndim}-dimensional array given. The input "
-            "array must be at least two-dimensional"
-        )
-
-    n, m = a.shape[-2:]
-    # TODO: use dpnp.linalg.LinAlgError
-    if m != n:
-        raise ValueError("Last 2 dimensions of the input array must be square")
+    n = a.shape[-2]
 
     a_sycl_queue = a.sycl_queue
     a_usm_type = a.usm_type

From 628dd90795409168044b24185757d89ae45af620 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 15 Dec 2023 19:59:33 +0100
Subject: [PATCH 31/55] Remove trash files

---
 perf.py | 32 --------------------------------
 qaz.py  | 17 -----------------
 test.py | 27 ---------------------------
 3 files changed, 76 deletions(-)
 delete mode 100644 perf.py
 delete mode 100644 qaz.py
 delete mode 100644 test.py

diff --git a/perf.py b/perf.py
deleted file mode 100644
index de88e2c24132..000000000000
--- a/perf.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import numpy
-import dpnp
-
-
-# na = numpy.random.randint(-10**4, 10**4, size=(4096,4096))
-
-# a = numpy.array(na, dtype = "int32")
-
-# a_dp = dpnp.array(a,device='cpu')
-
-# def test(xp,dtype,device):
-#     na = numpy.random.randint(-10**4, 10**4, size=(4096,4096))
-
-#     a = numpy.array(na, dtype = dtype)
-#     a_dp = dpnp.array(a,device=device)
-
-#     if xp == 0:
-#         numpy.linalg.slogdet(a)
-#     else:
-#         dpnp.linalg.slogdet(a_dp)
-
-def init(na, dtype):
-    # na = numpy.random.randint(-10**4, 10**4, size=(4096,4096))
-
-    a = numpy.array(na, dtype = dtype)
-    a_dp_cpu = dpnp.array(a,device='cpu')
-    a_dp_gpu = dpnp.array(a,device='gpu')
-
-    numpy.linalg.slogdet(a)
-    dpnp.linalg.slogdet(a_dp_cpu)
-
-    return a,a_dp_cpu,a_dp_gpu
diff --git a/qaz.py b/qaz.py
deleted file mode 100644
index 755f6b25e447..000000000000
--- a/qaz.py
+++ /dev/null
@@ -1,17 +0,0 @@
-import dpnp
-import numpy
-
-a = numpy.array(
-    [
-        [2, 3, 1, 4, 5],
-        [5, 6, 7, 8, 9],
-        [9, 7, 7, 2, 3],
-        [1, 4, 5, 1, 8],
-        [8, 9, 8, 5, 3],
-    ]
-)
-
-a_dp = dpnp.array(a)
-
-print(numpy.linalg.slogdet(a))
-print(dpnp.linalg.slogdet(a_dp))
diff --git a/test.py b/test.py
deleted file mode 100644
index fae55c253eb4..000000000000
--- a/test.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import dpnp
-import numpy
-import cProfile, pstats, io
-
-
-# na = numpy.random.randint(-10**4, 10**4, size=(4096,4096))
-na = numpy.random.randint(-10**4, 10**4, size=(8192,8192))
-a = numpy.array(na, dtype = "int32")
-a_dp = dpnp.array(a, device='cpu')
-
-a_stride = a[::2,::2]
-a_dp_stride = a_dp[::2,::2]
-dpnp.linalg.slogdet(a_dp_stride)
-
-pr = cProfile.Profile()
-pr.enable()
-# res = numpy.linalg.slogdet(a)
-res = dpnp.linalg.slogdet(a_dp_stride)
-pr.disable()
-
-s = io.StringIO()
-sortBy = pstats.SortKey.CUMULATIVE
-
-ps = pstats.Stats(pr, stream=s).sort_stats(sortBy)
-ps.print_stats()
-
-print(s.getvalue())

From a8db460f9af4c15cf357af0232795abc24e2001e Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Mon, 18 Dec 2023 15:20:07 +0100
Subject: [PATCH 32/55] Use getrf_batch only on CPU

---
 dpnp/linalg/dpnp_utils_linalg.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index 8323117a0b70..54053e625a42 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -209,7 +209,10 @@ def _lu_factor(a, res_type):
     a_usm_type = a.usm_type
 
     # TODO: Find out at which array sizes the best performance is obtained
-    use_batch = True
+    # getrf_batch implementation shows slow results with large arrays on GPU.
+    # Use getrf_batch only on CPU.
+    # On GPU call getrf for each two-dimensional array by loop
+    use_batch = a.sycl_device.has_aspect_cpu
 
     if a.ndim > 2:
         orig_shape = a.shape

From e3cd5c4c02712b1fe81b15d916f1446c0d5e92c2 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Mon, 18 Dec 2023 15:33:53 +0100
Subject: [PATCH 33/55] Update tests for dpnp.linalg.slogdet

---
 tests/test_linalg.py                          |  2 +-
 .../cupy/linalg_tests/test_norms.py           | 56 ++++++++++---------
 2 files changed, 31 insertions(+), 27 deletions(-)

diff --git a/tests/test_linalg.py b/tests/test_linalg.py
index 6847bb63e290..992f46fafc07 100644
--- a/tests/test_linalg.py
+++ b/tests/test_linalg.py
@@ -676,7 +676,7 @@ def test_slogdet_strides(self):
         assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4)
 
     # TODO: remove skipif when MKLD-16626 is resolved
-    @pytest.mark.skipif(is_cpu_device(), reason="MKL bug MKLD-16626")
+    @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626")
     @pytest.mark.parametrize(
         "matrix",
         [
diff --git a/tests/third_party/cupy/linalg_tests/test_norms.py b/tests/third_party/cupy/linalg_tests/test_norms.py
index ea6f3ec3941e..05dbd4da2e0d 100644
--- a/tests/third_party/cupy/linalg_tests/test_norms.py
+++ b/tests/third_party/cupy/linalg_tests/test_norms.py
@@ -49,36 +49,40 @@ def test_det_empty_matrices(self, xp, dtype):
     def test_det_different_last_two_dims(self, dtype):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((2, 3, 2), xp, dtype)
-            # TODO: replace ValueError with dpnp.linalg.LinAlgError
-            with pytest.raises((numpy.linalg.LinAlgError, ValueError)):
+            with pytest.raises(
+                (numpy.linalg.LinAlgError, cupy.linalg.LinAlgError)
+            ):
                 xp.linalg.det(a)
 
     @testing.for_float_dtypes(no_float16=True)
     def test_det_different_last_two_dims_empty_batch(self, dtype):
         for xp in (numpy, cupy):
             a = xp.empty((0, 3, 2), dtype=dtype)
-            # TODO: replace ValueError with dpnp.linalg.LinAlgError
-            with pytest.raises((numpy.linalg.LinAlgError, ValueError)):
+            with pytest.raises(
+                (numpy.linalg.LinAlgError, cupy.linalg.LinAlgError)
+            ):
                 xp.linalg.det(a)
 
     @testing.for_float_dtypes(no_float16=True)
     def test_det_one_dim(self, dtype):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((2,), xp, dtype)
-            # TODO: replace ValueError with dpnp.linalg.LinAlgError
-            with pytest.raises((numpy.linalg.LinAlgError, ValueError)):
+            with pytest.raises(
+                (numpy.linalg.LinAlgError, cupy.linalg.LinAlgError)
+            ):
                 xp.linalg.det(a)
 
     @testing.for_float_dtypes(no_float16=True)
     def test_det_zero_dim(self, dtype):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((), xp, dtype)
-            # TODO: replace ValueError with dpnp.linalg.LinAlgError
-            with pytest.raises((numpy.linalg.LinAlgError, ValueError)):
+            with pytest.raises(
+                (numpy.linalg.LinAlgError, cupy.linalg.LinAlgError)
+            ):
                 xp.linalg.det(a)
 
     # TODO: remove skipif when MKLD-16626 is resolved
-    @pytest.mark.skipif(is_cpu_device(), reason="MKL bug MKLD-16626")
+    @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626")
     @testing.for_float_dtypes(no_float16=True)
     @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
     def test_det_singular(self, xp, dtype):
@@ -109,7 +113,7 @@ def test_slogdet_4(self, xp, dtype):
         return sign, logdet
 
     # TODO: remove skipif when MKLD-16626 is resolved
-    @pytest.mark.skipif(is_cpu_device(), reason="MKL bug MKLD-16626")
+    @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626")
     @testing.for_dtypes("fd")
     @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
     def test_slogdet_singular(self, xp, dtype):
@@ -117,24 +121,24 @@ def test_slogdet_singular(self, xp, dtype):
         sign, logdet = xp.linalg.slogdet(a)
         return sign, logdet
 
-    # @testing.for_dtypes("fd")
-    # @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
-    # def test_slogdet_singular_errstate(self, xp, dtype):
-    #     a = xp.zeros((3, 3), dtype)
-    #     with cupyx.errstate(linalg="raise"):
-    #         # `cupy.linalg.slogdet` internally catches `dev_info < 0` from
-    #         # cuSOLVER, which should not affect `dev_info > 0` cases.
-    #         sign, logdet = xp.linalg.slogdet(a)
-    #     return sign, logdet
+    # TODO: remove skipif when MKLD-16626 is resolved
+    @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626")
+    @testing.for_dtypes("fd")
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_slogdet_singular_errstate(self, xp, dtype):
+        a = xp.zeros((3, 3), dtype=dtype)
+        # TODO: dpnp has not errstate. Probably to be implemented later
+        # with cupyx.errstate(linalg="raise"):
+        # `cupy.linalg.slogdet` internally catches `dev_info < 0` from
+        # cuSOLVER, which should not affect `dev_info > 0` cases.
+        sign, logdet = xp.linalg.slogdet(a)
+        return sign, logdet
 
     @testing.for_dtypes("fd")
     def test_slogdet_one_dim(self, dtype):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((2,), xp, dtype)
-            if xp is numpy:
-                with pytest.raises(numpy.linalg.LinAlgError):
-                    xp.linalg.slogdet(a)
-            else:
-                # Replace with dpnp.linalg.LinAlgError
-                with pytest.raises(ValueError):
-                    xp.linalg.slogdet(a)
+            with pytest.raises(
+                (numpy.linalg.LinAlgError, cupy.linalg.LinAlgError)
+            ):
+                xp.linalg.slogdet(a)

From 99f3618c88b14b2f430f255c1edaf6b98cf54579 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 20 Dec 2023 21:15:24 +0100
Subject: [PATCH 34/55] Address remarks

---
 dpnp/dpnp_algo/dpnp_algo.pxd                  |  1 -
 dpnp/linalg/dpnp_iface_linalg.py              | 34 ++++-------
 dpnp/linalg/dpnp_utils_linalg.py              | 59 ++++++++++---------
 .../cupy/linalg_tests/test_norms.py           | 34 ++++-------
 4 files changed, 55 insertions(+), 73 deletions(-)

diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd
index 63f61e7ce94f..cdeb78acb848 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pxd
+++ b/dpnp/dpnp_algo/dpnp_algo.pxd
@@ -54,7 +54,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_CUMSUM_EXT
         DPNP_FN_DEGREES
         DPNP_FN_DEGREES_EXT
-        DPNP_FN_DET
         DPNP_FN_DIAG_INDICES
         DPNP_FN_DIAG_INDICES_EXT
         DPNP_FN_DIAGONAL
diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py
index 384543e268ae..45b159428716 100644
--- a/dpnp/linalg/dpnp_iface_linalg.py
+++ b/dpnp/linalg/dpnp_iface_linalg.py
@@ -158,7 +158,7 @@ def det(a):
 
     Parameters
     ----------
-    a : (..., M, M) dpnp.ndarray
+    a : (..., M, M) {dpnp.ndarray, usm_ndarray}
         Input array to compute determinants for.
 
     Returns
@@ -194,10 +194,7 @@ def det(a):
 
     """
 
-    if not dpnp.is_supported_array_type(a):
-        raise TypeError(
-            "An array must be any of supported type, but got {}".format(type(a))
-        )
+    dpnp.check_supported_arrays_type(a)
 
     sign, logdet = slogdet(a)
     return sign * dpnp.exp(logdet)
@@ -664,28 +661,23 @@ def svd(x1, full_matrices=True, compute_uv=True, hermitian=False):
 
 def slogdet(a):
     """
-    Returns sign and logarithm of the determinant of an array.
-
-    It calculates the natural logarithm of the determinant of a given value.
+    Compute the sign and (natural) logarithm of the determinant of an array.
 
     For full documentation refer to :obj:`numpy.linalg.slogdet`.
 
-    Args
-    ----
-        a (dpnp.ndarray): The input matrix with dimension ``(..., N, N)``.
+    Parameters
+    ----------
+    a : (..., M, M) {dpnp.ndarray, usm_ndarray}
+        Input array, has to be a square 2-D array.
 
     Returns
     -------
-        out : tuple of :class:`~dpnp.ndarray`:
-            It returns a tuple ``(sign, logdet)``.
-            ``sign`` represents each sign of the determinant as a real number
-            ``0``, ``1`` or ``-1``.
-            ``logdet`` represents the natural logarithm of the absolute of the
-            determinant.
-            If the determinant is zero, ``sign`` will be ``0`` and ``logdet``
-            will be ``-inf``.
-            The shapes of both ``sign`` and ``logdet`` are equal to
-            ``a.shape[:-2]``.
+        sign : (...) dpnp.ndarray
+            A number representing the sign of the determinant. For a real matrix,
+            this is 1, 0, or -1. For a complex matrix, this is a complex number
+            with absolute value 1 (i.e., it is on the unit circle), or else 0.
+        logabsdet : (...) dpnp.ndarray
+            The natural log of the absolute value of the determinant.
 
     Limitations
     -----------
diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index 54053e625a42..527d7a63c5ce 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -38,8 +38,8 @@
     "check_stacked_2d",
     "check_stacked_square",
     "dpnp_eigh",
-    "dpnp_solve",
     "dpnp_slogdet",
+    "dpnp_solve",
 ]
 
 _jobz = {"N": 0, "V": 1}
@@ -176,30 +176,30 @@ def _common_inexact_type(default_dtype, *dtypes):
 
 
 def _lu_factor(a, res_type):
-    """Compute pivoted LU decomposition.
+    """
+    Compute pivoted LU decomposition.
 
     Decompose a given batch of square matrices. Inputs and outputs are
     transposed.
 
-    Args:
-        a (dpnp.ndarray): The input matrix with dimension ``(..., N, N)``.
-           The dimension condition is not checked.
-        res_type (dpnp.dtype): float32, float64, complex64 or complex128.
+    Parameters
+    ----------
+        a : (..., M, M) {dpnp.ndarray, usm_ndarray}
+            Input array containing the matrices to be decomposed.
+        res_type : dpnp.dtype
+            Specifies the data type of the result.
+            Acceptable data types are float32, float64, complex64, or complex128.
 
-    Returns:
+    Returns
+    -------
         tuple:
-        lu_t (dpnp.ndarray):
-            ``L`` without its unit diagonal and ``U`` with
-            dimension ``(..., N, N)``.
-        piv (dpnp.ndarray):
-            1-origin pivot indices with dimension
-            ``(..., N)``.
-        dev_info (dpnp.ndarray):
-            ``getrf`` or `getrf_batch` info with dimension ``(...)``.
-
-    See Also
-    --------
-    :obj:`scipy.linalg.lu_factor`
+        lu_t : (..., N, N) {dpnp.ndarray, usm_ndarray}
+            Combined 'L' and 'U' matrices from LU decomposition
+            excluding the diagonal of 'L'.
+        piv : (..., N) {dpnp.ndarray, usm_ndarray}
+            1-origin pivot indices indicating row permutations during decomposition.
+        dev_info : (...) {dpnp.ndarray, usm_ndarray}
+            Information on `getrf` or `getrf_batch` computation success (0 for success).
 
     """
 
@@ -312,23 +312,17 @@ def _lu_factor(a, res_type):
                 a_ht_copy_ev[i].wait()
 
             # Reshape the results back to their original shape
-            out_v = dpnp.array(a_vecs, order="C").reshape(orig_shape)
+            out_a = dpnp.array(a_vecs, order="C").reshape(orig_shape)
             out_ipiv = dpnp.array(ipiv_vecs).reshape(orig_shape[:-1])
             out_dev_info = dpnp.array(dev_info_vecs).reshape(orig_shape[:-2])
 
-            return (out_v, out_ipiv, out_dev_info)
+            return (out_a, out_ipiv, out_dev_info)
 
     else:
         a_usm_arr = dpnp.get_usm_ndarray(a)
 
         # `a` must be copied because getrf destroys the input matrix
         a_h = dpnp.empty_like(a, order="C", dtype=res_type)
-        ipiv_h = dpnp.empty(
-            n, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue
-        )
-        dev_info_h = dpnp.empty(
-            1, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue
-        )
 
         # use DPCTL tensor function to fill the сopy of the input array
         # from the input array
@@ -336,6 +330,13 @@ def _lu_factor(a, res_type):
             src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a_sycl_queue
         )
 
+        ipiv_h = dpnp.empty(
+            n, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue
+        )
+        dev_info_h = dpnp.empty(
+            1, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue
+        )
+
         # Call the LAPACK extension function _getrf
         # to perform LU decomposition on the input matrix
         ht_lapack_ev, _ = li._getrf(
@@ -665,11 +666,11 @@ def dpnp_slogdet(a):
         ),
         axis=-1,
     )
-    if res_type.kind == "f":
+    if dpnp.issubdtype(res_type, dpnp.floating):
         non_zero += dpnp.count_nonzero(diag < 0, axis=-1)
 
     sign = (non_zero % 2) * -2 + 1
-    if res_type.kind == "c":
+    if dpnp.issubdtype(res_type, dpnp.complexfloating):
         sign = sign * dpnp.prod(diag / dpnp.abs(diag), axis=-1)
 
     sign = sign.astype(res_type)
diff --git a/tests/third_party/cupy/linalg_tests/test_norms.py b/tests/third_party/cupy/linalg_tests/test_norms.py
index 05dbd4da2e0d..7198be85d9af 100644
--- a/tests/third_party/cupy/linalg_tests/test_norms.py
+++ b/tests/third_party/cupy/linalg_tests/test_norms.py
@@ -49,36 +49,28 @@ def test_det_empty_matrices(self, xp, dtype):
     def test_det_different_last_two_dims(self, dtype):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((2, 3, 2), xp, dtype)
-            with pytest.raises(
-                (numpy.linalg.LinAlgError, cupy.linalg.LinAlgError)
-            ):
+            with pytest.raises(xp.linalg.LinAlgError):
                 xp.linalg.det(a)
 
     @testing.for_float_dtypes(no_float16=True)
     def test_det_different_last_two_dims_empty_batch(self, dtype):
         for xp in (numpy, cupy):
             a = xp.empty((0, 3, 2), dtype=dtype)
-            with pytest.raises(
-                (numpy.linalg.LinAlgError, cupy.linalg.LinAlgError)
-            ):
+            with pytest.raises(xp.linalg.LinAlgError):
                 xp.linalg.det(a)
 
     @testing.for_float_dtypes(no_float16=True)
     def test_det_one_dim(self, dtype):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((2,), xp, dtype)
-            with pytest.raises(
-                (numpy.linalg.LinAlgError, cupy.linalg.LinAlgError)
-            ):
+            with pytest.raises(xp.linalg.LinAlgError):
                 xp.linalg.det(a)
 
     @testing.for_float_dtypes(no_float16=True)
     def test_det_zero_dim(self, dtype):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((), xp, dtype)
-            with pytest.raises(
-                (numpy.linalg.LinAlgError, cupy.linalg.LinAlgError)
-            ):
+            with pytest.raises(xp.linalg.LinAlgError):
                 xp.linalg.det(a)
 
     # TODO: remove skipif when MKLD-16626 is resolved
@@ -91,21 +83,21 @@ def test_det_singular(self, xp, dtype):
 
 
 class TestSlogdet(unittest.TestCase):
-    @testing.for_dtypes("fd")
+    @testing.for_dtypes("fdFD")
     @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
     def test_slogdet(self, xp, dtype):
         a = testing.shaped_arange((2, 2), xp, dtype) + 1
         sign, logdet = xp.linalg.slogdet(a)
         return sign, logdet
 
-    @testing.for_dtypes("fd")
+    @testing.for_dtypes("fdFD")
     @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
     def test_slogdet_3(self, xp, dtype):
         a = testing.shaped_arange((2, 2, 2), xp, dtype) + 1
         sign, logdet = xp.linalg.slogdet(a)
         return sign, logdet
 
-    @testing.for_dtypes("fd")
+    @testing.for_dtypes("fdFD")
     @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
     def test_slogdet_4(self, xp, dtype):
         a = testing.shaped_arange((2, 2, 2, 2), xp, dtype) + 1
@@ -114,7 +106,7 @@ def test_slogdet_4(self, xp, dtype):
 
     # TODO: remove skipif when MKLD-16626 is resolved
     @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626")
-    @testing.for_dtypes("fd")
+    @testing.for_dtypes("fdFD")
     @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
     def test_slogdet_singular(self, xp, dtype):
         a = xp.zeros((3, 3), dtype=dtype)
@@ -123,22 +115,20 @@ def test_slogdet_singular(self, xp, dtype):
 
     # TODO: remove skipif when MKLD-16626 is resolved
     @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626")
-    @testing.for_dtypes("fd")
+    @testing.for_dtypes("fdFD")
     @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
     def test_slogdet_singular_errstate(self, xp, dtype):
         a = xp.zeros((3, 3), dtype=dtype)
-        # TODO: dpnp has not errstate. Probably to be implemented later
+        # TODO: dpnp has no errstate. Probably to be implemented later
         # with cupyx.errstate(linalg="raise"):
         # `cupy.linalg.slogdet` internally catches `dev_info < 0` from
         # cuSOLVER, which should not affect `dev_info > 0` cases.
         sign, logdet = xp.linalg.slogdet(a)
         return sign, logdet
 
-    @testing.for_dtypes("fd")
+    @testing.for_dtypes("fdFD")
     def test_slogdet_one_dim(self, dtype):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((2,), xp, dtype)
-            with pytest.raises(
-                (numpy.linalg.LinAlgError, cupy.linalg.LinAlgError)
-            ):
+            with pytest.raises(xp.linalg.LinAlgError):
                 xp.linalg.slogdet(a)

From 1f9b6fa3afcbbcb956130575728497b53749484b Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 20 Dec 2023 22:31:30 +0100
Subject: [PATCH 35/55] Add _real_type func

---
 dpnp/linalg/dpnp_utils_linalg.py | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index 527d7a63c5ce..56ef22c0e83b 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -45,6 +45,36 @@
 _jobz = {"N": 0, "V": 1}
 _upper_lower = {"U": 0, "L": 1}
 
+_real_types_map = {
+    "float32": "float32",  # single : single
+    "float64": "float64",  # double : double
+    "complex64": "float32",  # csingle : csingle
+    "complex128": "float64",  # cdouble : cdouble
+}
+
+
+def _real_type(dtype, device=None):
+    """
+    Returns the real data type corresponding to a given dpnp data type.
+
+    Parameters
+    ----------
+    dtype : dpnp.dtype
+        The dtype for which to find the corresponding real data type.
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where an array of default floating type might be created.
+
+    Returns
+    -------
+    out : str
+        The name of the real data type.
+
+    """
+
+    default = dpnp.default_float_type(device)
+    real_type = _real_types_map.get(dtype.name, default)
+    return dpnp.dtype(real_type)
+
 
 def check_stacked_2d(*arrays):
     """
@@ -629,7 +659,7 @@ def dpnp_slogdet(a):
     a_sycl_queue = a.sycl_queue
 
     res_type = _common_type(a)
-    logdet_dtype = dpnp.dtype(res_type.char.lower())
+    logdet_dtype = _real_type(res_type)
 
     a_shape = a.shape
     shape = a_shape[:-2]

From 7e12063e42e427b76fb21ec8afd7fe7b3b3dd3a8 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 20 Dec 2023 22:35:02 +0100
Subject: [PATCH 36/55] Add test_det in test_usm_type

---
 tests/test_usm_type.py | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py
index 8e5bc500a3c3..60e330b19529 100644
--- a/tests/test_usm_type.py
+++ b/tests/test_usm_type.py
@@ -569,3 +569,33 @@ def test_slogdet(shape, is_empty, usm_type):
 
     assert x.usm_type == sign.usm_type
     assert x.usm_type == logdet.usm_type
+
+
+@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize(
+    "shape, is_empty",
+    [
+        ((2, 2), False),
+        ((3, 2, 2), False),
+        ((0, 0), True),
+        ((0, 2, 2), True),
+    ],
+    ids=[
+        "(2, 2)",
+        "(3, 2, 2)",
+        "(0, 0)",
+        "(0, 2, 2)",
+    ],
+)
+def test_det(shape, is_empty, usm_type):
+    if is_empty:
+        x = dp.empty(shape, dtype=dp.default_float_type(), usm_type=usm_type)
+    else:
+        count_elem = numpy.prod(shape)
+        x = dp.arange(
+            1, count_elem + 1, dtype=dp.default_float_type(), usm_type=usm_type
+        ).reshape(shape)
+
+    det = dp.linalg.det(x)
+
+    assert x.usm_type == det.usm_type

From 0e822584512a6a955c37a1c3dbfc2cd8bd65ea39 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 21 Dec 2023 12:48:56 +0100
Subject: [PATCH 37/55] Add more checks in getrf and getf_batch functions

---
 dpnp/backend/extensions/lapack/getrf.cpp      | 44 ++++++++++++++++--
 .../backend/extensions/lapack/getrf_batch.cpp | 46 +++++++++++++++++--
 2 files changed, 81 insertions(+), 9 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp
index 3fe7927a9a01..c09687f641e9 100644
--- a/dpnp/backend/extensions/lapack/getrf.cpp
+++ b/dpnp/backend/extensions/lapack/getrf.cpp
@@ -134,13 +134,41 @@ static sycl::event getrf_impl(sycl::queue exec_q,
 }
 
 std::pair<sycl::event, sycl::event>
-    getrf(sycl::queue q,
+    getrf(sycl::queue exec_q,
           dpctl::tensor::usm_ndarray a_array,
           dpctl::tensor::usm_ndarray ipiv_array,
           dpctl::tensor::usm_ndarray dev_info_array,
           const std::int64_t n,
           const std::vector<sycl::event> &depends)
 {
+    const int a_array_nd = a_array.get_ndim();
+    const int ipiv_array_nd = ipiv_array.get_ndim();
+
+    if (a_array_nd != 2) {
+        throw py::value_error(
+            "The input array has ndim=" + std::to_string(a_array_nd) +
+            ", but a 2-dimensional array is expected.");
+    }
+
+    if (ipiv_array_nd != 1) {
+        throw py::value_error("The array of pivot indices has ndim=" +
+                              std::to_string(ipiv_array_nd) +
+                              ", but a 1-dimensional array is expected.");
+    }
+
+    // check compatibility of execution queue and allocation queue
+    if (!dpctl::utils::queues_are_compatible(
+            exec_q, {a_array, ipiv_array, dev_info_array}))
+    {
+        throw py::value_error(
+            "Execution queue is not compatible with allocation queues");
+    }
+
+    bool is_a_array_c_contig = a_array.is_c_contiguous();
+    if (!is_a_array_c_contig) {
+        throw py::value_error("The input array "
+                              "must be C-contiguous");
+    }
 
     auto array_types = dpctl_td_ns::usm_ndarray_types();
     int a_array_type_id =
@@ -153,6 +181,14 @@ std::pair<sycl::event, sycl::event>
             "of the input matrix.");
     }
 
+    auto ipiv_types = dpctl_td_ns::usm_ndarray_types();
+    int ipiv_array_type_id =
+        ipiv_types.typenum_to_lookup_id(ipiv_array.get_typenum());
+
+    if (ipiv_array_type_id != static_cast<int>(dpctl_td_ns::typenum_t::INT64)) {
+        throw py::value_error("The type of 'ipiv_array' must be int64.");
+    }
+
     char *a_array_data = a_array.get_data();
     const std::int64_t lda = std::max<size_t>(1UL, n);
 
@@ -164,11 +200,11 @@ std::pair<sycl::event, sycl::event>
         reinterpret_cast<std::int64_t *>(dev_info_array_data);
 
     std::vector<sycl::event> host_task_events;
-    sycl::event getrf_ev = getrf_fn(q, n, a_array_data, lda, d_ipiv, d_dev_info,
-                                    host_task_events, depends);
+    sycl::event getrf_ev = getrf_fn(exec_q, n, a_array_data, lda, d_ipiv,
+                                    d_dev_info, host_task_events, depends);
 
     sycl::event args_ev = dpctl::utils::keep_args_alive(
-        q, {a_array, ipiv_array, dev_info_array}, host_task_events);
+        exec_q, {a_array, ipiv_array, dev_info_array}, host_task_events);
 
     return std::make_pair(args_ev, getrf_ev);
 }
diff --git a/dpnp/backend/extensions/lapack/getrf_batch.cpp b/dpnp/backend/extensions/lapack/getrf_batch.cpp
index b15fac74efee..86cb4c9aee93 100644
--- a/dpnp/backend/extensions/lapack/getrf_batch.cpp
+++ b/dpnp/backend/extensions/lapack/getrf_batch.cpp
@@ -147,7 +147,7 @@ static sycl::event getrf_batch_impl(sycl::queue exec_q,
 }
 
 std::pair<sycl::event, sycl::event>
-    getrf_batch(sycl::queue q,
+    getrf_batch(sycl::queue exec_q,
                 dpctl::tensor::usm_ndarray a_array,
                 dpctl::tensor::usm_ndarray ipiv_array,
                 dpctl::tensor::usm_ndarray dev_info_array,
@@ -157,6 +157,34 @@ std::pair<sycl::event, sycl::event>
                 std::int64_t batch_size,
                 const std::vector<sycl::event> &depends)
 {
+    const int a_array_nd = a_array.get_ndim();
+    const int ipiv_array_nd = ipiv_array.get_ndim();
+
+    if (a_array_nd < 3) {
+        throw py::value_error(
+            "The input array has ndim=" + std::to_string(a_array_nd) +
+            ", but an array with ndim >= 3 is expected.");
+    }
+
+    if (ipiv_array_nd != 2) {
+        throw py::value_error("The array of pivot indices has ndim=" +
+                              std::to_string(ipiv_array_nd) +
+                              ", but a 2-dimensional array is expected.");
+    }
+
+    // check compatibility of execution queue and allocation queue
+    if (!dpctl::utils::queues_are_compatible(
+            exec_q, {a_array, ipiv_array, dev_info_array}))
+    {
+        throw py::value_error(
+            "Execution queue is not compatible with allocation queues");
+    }
+
+    bool is_a_array_c_contig = a_array.is_c_contiguous();
+    if (!is_a_array_c_contig) {
+        throw py::value_error("The input array "
+                              "must be C-contiguous");
+    }
 
     auto array_types = dpctl_td_ns::usm_ndarray_types();
     int a_array_type_id =
@@ -170,6 +198,14 @@ std::pair<sycl::event, sycl::event>
             "of the input matrix.");
     }
 
+    auto ipiv_types = dpctl_td_ns::usm_ndarray_types();
+    int ipiv_array_type_id =
+        ipiv_types.typenum_to_lookup_id(ipiv_array.get_typenum());
+
+    if (ipiv_array_type_id != static_cast<int>(dpctl_td_ns::typenum_t::INT64)) {
+        throw py::value_error("The type of 'ipiv_array' must be int64.");
+    }
+
     char *a_array_data = a_array.get_data();
     const std::int64_t lda = std::max<size_t>(1UL, n);
 
@@ -181,12 +217,12 @@ std::pair<sycl::event, sycl::event>
         reinterpret_cast<std::int64_t *>(dev_info_array_data);
 
     std::vector<sycl::event> host_task_events;
-    sycl::event getrf_batch_ev =
-        getrf_batch_fn(q, n, a_array_data, lda, stride_a, d_ipiv, stride_ipiv,
-                       batch_size, d_dev_info, host_task_events, depends);
+    sycl::event getrf_batch_ev = getrf_batch_fn(
+        exec_q, n, a_array_data, lda, stride_a, d_ipiv, stride_ipiv, batch_size,
+        d_dev_info, host_task_events, depends);
 
     sycl::event args_ev = dpctl::utils::keep_args_alive(
-        q, {a_array, ipiv_array, dev_info_array}, host_task_events);
+        exec_q, {a_array, ipiv_array, dev_info_array}, host_task_events);
 
     return std::make_pair(args_ev, getrf_batch_ev);
 }

From 3a6e5ce4622b6c1437b20341aec960434cba85c3 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 21 Dec 2023 14:11:55 +0100
Subject: [PATCH 38/55] Improve error handler in getrf_impl

---
 dpnp/backend/extensions/lapack/getrf.cpp | 48 +++++++++++++++++-------
 1 file changed, 35 insertions(+), 13 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp
index c09687f641e9..b28f42621be6 100644
--- a/dpnp/backend/extensions/lapack/getrf.cpp
+++ b/dpnp/backend/extensions/lapack/getrf.cpp
@@ -77,6 +77,9 @@ static sycl::event getrf_impl(sycl::queue exec_q,
 
     std::stringstream error_msg;
     std::int64_t info = 0;
+    bool mkl_exception_caught = false;
+    bool mkl_singular_exception_caught = false;
+    bool sycl_exception_caught = false;
 
     sycl::event getrf_event;
     try {
@@ -96,33 +99,52 @@ static sycl::event getrf_impl(sycl::queue exec_q,
                         // routine for storing intermediate results.
             scratchpad_size, depends);
     } catch (mkl_lapack::exception const &e) {
-        error_msg
-            << "Unexpected MKL exception caught during getrf() call:\nreason: "
-            << e.what() << "\ninfo: " << e.info();
         info = e.info();
+
+        if (info < 0) {
+            error_msg << "Parameter number " << -info
+                      << " had an illegal value.";
+            mkl_exception_caught = true;
+        }
+        else if (info == scratchpad_size && e.detail() != 0) {
+            error_msg
+                << "Insufficient scratchpad size. Required size is at least "
+                << e.detail();
+            mkl_exception_caught = true;
+        }
+        else if (info > 0) {
+            mkl_singular_exception_caught = true;
+        }
+        else {
+            error_msg << "Unexpected MKL exception caught during getrf() "
+                         "call:\nreason: "
+                      << e.what() << "\ninfo: " << e.info();
+            mkl_exception_caught = true;
+        }
     } catch (sycl::exception const &e) {
         error_msg << "Unexpected SYCL exception caught during getrf() call:\n"
                   << e.what();
-        info = -1;
+        sycl_exception_caught = true;
     }
 
-    if (info != 0) // an unexpected error occurs
+    if (mkl_exception_caught ||
+        sycl_exception_caught) // an unexpected error occurs
     {
         if (scratchpad != nullptr) {
             sycl::free(scratchpad, exec_q);
         }
 
-        if (info < 0) {
-            throw std::runtime_error(error_msg.str());
-        }
+        throw std::runtime_error(error_msg.str());
     }
 
-    sycl::event write_info_event = exec_q.submit([&](sycl::handler &cgh) {
-        cgh.depends_on(getrf_event);
-        cgh.single_task([=]() { dev_info[0] = info; });
-    });
+    if (mkl_singular_exception_caught) {
+        sycl::event write_info_event = exec_q.submit([&](sycl::handler &cgh) {
+            cgh.depends_on(getrf_event);
+            cgh.single_task([=]() { dev_info[0] = info; });
+        });
 
-    host_task_events.push_back(write_info_event);
+        host_task_events.push_back(write_info_event);
+    }
 
     sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) {
         cgh.depends_on(getrf_event);

From 6700fce216493ab4d2683919c5bf428ee7258057 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 21 Dec 2023 15:05:54 +0100
Subject: [PATCH 39/55] Improve error handler in getrf_batch_impl

---
 .../backend/extensions/lapack/getrf_batch.cpp | 53 +++++++++++++------
 1 file changed, 38 insertions(+), 15 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/getrf_batch.cpp b/dpnp/backend/extensions/lapack/getrf_batch.cpp
index 86cb4c9aee93..4bf69fdc73b7 100644
--- a/dpnp/backend/extensions/lapack/getrf_batch.cpp
+++ b/dpnp/backend/extensions/lapack/getrf_batch.cpp
@@ -86,6 +86,9 @@ static sycl::event getrf_batch_impl(sycl::queue exec_q,
 
     std::stringstream error_msg;
     std::int64_t info = 0;
+    bool mkl_exception_caught = false;
+    bool mkl_singular_exception_caught = false;
+    bool sycl_exception_caught = false;
 
     sycl::event getrf_batch_event;
     try {
@@ -109,33 +112,53 @@ static sycl::event getrf_batch_impl(sycl::queue exec_q,
                          // routine for storing intermediate results.
             scratchpad_size, depends);
     } catch (mkl_lapack::exception const &e) {
-        error_msg
-            << "Unexpected MKL exception caught during getrf() call:\nreason: "
-            << e.what() << "\ninfo: " << e.info();
         info = e.info();
+
+        if (info < 0) {
+            error_msg << "Parameter number " << -info
+                      << " had an illegal value.";
+            mkl_exception_caught = true;
+        }
+        else if (info == scratchpad_size && e.detail() != 0) {
+            error_msg
+                << "Insufficient scratchpad size. Required size is at least "
+                << e.detail();
+            mkl_exception_caught = true;
+        }
+        else if (info > 0) {
+            mkl_singular_exception_caught = true;
+        }
+        else {
+            error_msg << "Unexpected MKL exception caught during getrf_batch() "
+                         "call:\nreason: "
+                      << e.what() << "\ninfo: " << e.info();
+            mkl_exception_caught = true;
+        }
     } catch (sycl::exception const &e) {
-        error_msg << "Unexpected SYCL exception caught during getrf() call:\n"
-                  << e.what();
-        info = -1;
+        error_msg
+            << "Unexpected SYCL exception caught during getrf_batch() call:\n"
+            << e.what();
+        sycl_exception_caught = true;
     }
 
-    if (info != 0) // an unexpected error occurs
+    if (mkl_exception_caught ||
+        sycl_exception_caught) // an unexpected error occurs
     {
         if (scratchpad != nullptr) {
             sycl::free(scratchpad, exec_q);
         }
 
-        if (info < 0) {
-            throw std::runtime_error(error_msg.str());
-        }
+        throw std::runtime_error(error_msg.str());
     }
 
-    sycl::event write_info_event = exec_q.submit([&](sycl::handler &cgh) {
-        cgh.depends_on(getrf_batch_event);
-        cgh.single_task([=]() { dev_info[0] = info; });
-    });
+    if (mkl_singular_exception_caught) {
+        sycl::event write_info_event = exec_q.submit([&](sycl::handler &cgh) {
+            cgh.depends_on(getrf_batch_event);
+            cgh.single_task([=]() { dev_info[0] = info; });
+        });
 
-    host_task_events.push_back(write_info_event);
+        host_task_events.push_back(write_info_event);
+    }
 
     sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) {
         cgh.depends_on(getrf_batch_event);

From 644485b37f199d7935d0e7c8ebdc55f4ff0ade55 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 21 Dec 2023 15:09:11 +0100
Subject: [PATCH 40/55] dev_info is allocated as zeros

---
 dpnp/linalg/dpnp_utils_linalg.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index 56ef22c0e83b..dcf5da31da6f 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -260,8 +260,8 @@ def _lu_factor(a, res_type):
                 usm_type=a_usm_type,
                 sycl_queue=a_sycl_queue,
             )
-            dev_info_h = dpnp.empty(
-                1,
+            dev_info_h = dpnp.zeros(
+                (batch_size,),
                 dtype=dpnp.int64,
                 usm_type=a_usm_type,
                 sycl_queue=a_sycl_queue,
@@ -319,7 +319,7 @@ def _lu_factor(a, res_type):
                     usm_type=a_usm_type,
                     sycl_queue=a_sycl_queue,
                 )
-                dev_info_vecs[i] = dpnp.empty(
+                dev_info_vecs[i] = dpnp.zeros(
                     (1,),
                     dtype=dpnp.int64,
                     usm_type=a_usm_type,
@@ -363,8 +363,8 @@ def _lu_factor(a, res_type):
         ipiv_h = dpnp.empty(
             n, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue
         )
-        dev_info_h = dpnp.empty(
-            1, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue
+        dev_info_h = dpnp.zeros(
+            (1,), dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue
         )
 
         # Call the LAPACK extension function _getrf

From 5015e15c64b9484353f6aaaac912b6bca1b1bf99 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 21 Dec 2023 15:13:40 +0100
Subject: [PATCH 41/55] Remove skipif for singular tests

---
 tests/test_linalg.py                              | 2 --
 tests/third_party/cupy/linalg_tests/test_norms.py | 6 ------
 2 files changed, 8 deletions(-)

diff --git a/tests/test_linalg.py b/tests/test_linalg.py
index 992f46fafc07..a061f7e65be1 100644
--- a/tests/test_linalg.py
+++ b/tests/test_linalg.py
@@ -675,8 +675,6 @@ def test_slogdet_strides(self):
         assert_allclose(sign_expected, sign_result)
         assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4)
 
-    # TODO: remove skipif when MKLD-16626 is resolved
-    @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626")
     @pytest.mark.parametrize(
         "matrix",
         [
diff --git a/tests/third_party/cupy/linalg_tests/test_norms.py b/tests/third_party/cupy/linalg_tests/test_norms.py
index 7198be85d9af..bcf2c21d27f8 100644
--- a/tests/third_party/cupy/linalg_tests/test_norms.py
+++ b/tests/third_party/cupy/linalg_tests/test_norms.py
@@ -73,8 +73,6 @@ def test_det_zero_dim(self, dtype):
             with pytest.raises(xp.linalg.LinAlgError):
                 xp.linalg.det(a)
 
-    # TODO: remove skipif when MKLD-16626 is resolved
-    @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626")
     @testing.for_float_dtypes(no_float16=True)
     @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
     def test_det_singular(self, xp, dtype):
@@ -104,8 +102,6 @@ def test_slogdet_4(self, xp, dtype):
         sign, logdet = xp.linalg.slogdet(a)
         return sign, logdet
 
-    # TODO: remove skipif when MKLD-16626 is resolved
-    @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626")
     @testing.for_dtypes("fdFD")
     @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
     def test_slogdet_singular(self, xp, dtype):
@@ -113,8 +109,6 @@ def test_slogdet_singular(self, xp, dtype):
         sign, logdet = xp.linalg.slogdet(a)
         return sign, logdet
 
-    # TODO: remove skipif when MKLD-16626 is resolved
-    @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626")
     @testing.for_dtypes("fdFD")
     @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
     def test_slogdet_singular_errstate(self, xp, dtype):

From 68c436e7d0bb4853274678f1117f8778b7b78d1c Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 22 Dec 2023 00:18:14 +0100
Subject: [PATCH 42/55] Implement _lu_factor logic with dev_info as a python
 list

---
 dpnp/backend/extensions/lapack/getrf.cpp      | 35 ++++++----------
 dpnp/backend/extensions/lapack/getrf.hpp      |  4 +-
 .../backend/extensions/lapack/getrf_batch.cpp | 35 ++++++----------
 dpnp/linalg/dpnp_utils_linalg.py              | 40 +++++++++----------
 4 files changed, 45 insertions(+), 69 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp
index b28f42621be6..dcef32bda031 100644
--- a/dpnp/backend/extensions/lapack/getrf.cpp
+++ b/dpnp/backend/extensions/lapack/getrf.cpp
@@ -51,7 +51,7 @@ typedef sycl::event (*getrf_impl_fn_ptr_t)(sycl::queue,
                                            char *,
                                            std::int64_t,
                                            std::int64_t *,
-                                           std::int64_t *,
+                                           py::list,
                                            std::vector<sycl::event> &,
                                            const std::vector<sycl::event> &);
 
@@ -63,7 +63,7 @@ static sycl::event getrf_impl(sycl::queue exec_q,
                               char *in_a,
                               std::int64_t lda,
                               std::int64_t *ipiv,
-                              std::int64_t *dev_info,
+                              py::list dev_info,
                               std::vector<sycl::event> &host_task_events,
                               const std::vector<sycl::event> &depends)
 {
@@ -78,7 +78,6 @@ static sycl::event getrf_impl(sycl::queue exec_q,
     std::stringstream error_msg;
     std::int64_t info = 0;
     bool mkl_exception_caught = false;
-    bool mkl_singular_exception_caught = false;
     bool sycl_exception_caught = false;
 
     sycl::event getrf_event;
@@ -113,7 +112,12 @@ static sycl::event getrf_impl(sycl::queue exec_q,
             mkl_exception_caught = true;
         }
         else if (info > 0) {
-            mkl_singular_exception_caught = true;
+            // Store the positive 'info' value in the first element of
+            // 'dev_info'. This indicates that the factorization has been
+            // completed, but the factor U (upper triangular matrix) is exactly
+            // singular. The 'info' value here is the index of the first zero
+            // element in the diagonal of U.
+            dev_info[0] = info;
         }
         else {
             error_msg << "Unexpected MKL exception caught during getrf() "
@@ -137,15 +141,6 @@ static sycl::event getrf_impl(sycl::queue exec_q,
         throw std::runtime_error(error_msg.str());
     }
 
-    if (mkl_singular_exception_caught) {
-        sycl::event write_info_event = exec_q.submit([&](sycl::handler &cgh) {
-            cgh.depends_on(getrf_event);
-            cgh.single_task([=]() { dev_info[0] = info; });
-        });
-
-        host_task_events.push_back(write_info_event);
-    }
-
     sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) {
         cgh.depends_on(getrf_event);
         auto ctx = exec_q.get_context();
@@ -159,7 +154,7 @@ std::pair<sycl::event, sycl::event>
     getrf(sycl::queue exec_q,
           dpctl::tensor::usm_ndarray a_array,
           dpctl::tensor::usm_ndarray ipiv_array,
-          dpctl::tensor::usm_ndarray dev_info_array,
+          py::list dev_info,
           const std::int64_t n,
           const std::vector<sycl::event> &depends)
 {
@@ -179,9 +174,7 @@ std::pair<sycl::event, sycl::event>
     }
 
     // check compatibility of execution queue and allocation queue
-    if (!dpctl::utils::queues_are_compatible(
-            exec_q, {a_array, ipiv_array, dev_info_array}))
-    {
+    if (!dpctl::utils::queues_are_compatible(exec_q, {a_array, ipiv_array})) {
         throw py::value_error(
             "Execution queue is not compatible with allocation queues");
     }
@@ -217,16 +210,12 @@ std::pair<sycl::event, sycl::event>
     char *ipiv_array_data = ipiv_array.get_data();
     std::int64_t *d_ipiv = reinterpret_cast<std::int64_t *>(ipiv_array_data);
 
-    char *dev_info_array_data = dev_info_array.get_data();
-    std::int64_t *d_dev_info =
-        reinterpret_cast<std::int64_t *>(dev_info_array_data);
-
     std::vector<sycl::event> host_task_events;
     sycl::event getrf_ev = getrf_fn(exec_q, n, a_array_data, lda, d_ipiv,
-                                    d_dev_info, host_task_events, depends);
+                                    dev_info, host_task_events, depends);
 
     sycl::event args_ev = dpctl::utils::keep_args_alive(
-        exec_q, {a_array, ipiv_array, dev_info_array}, host_task_events);
+        exec_q, {a_array, ipiv_array}, host_task_events);
 
     return std::make_pair(args_ev, getrf_ev);
 }
diff --git a/dpnp/backend/extensions/lapack/getrf.hpp b/dpnp/backend/extensions/lapack/getrf.hpp
index 38db17bd9a03..6b9a4891bfd8 100644
--- a/dpnp/backend/extensions/lapack/getrf.hpp
+++ b/dpnp/backend/extensions/lapack/getrf.hpp
@@ -42,7 +42,7 @@ extern std::pair<sycl::event, sycl::event>
     getrf(sycl::queue exec_q,
           dpctl::tensor::usm_ndarray a_array,
           dpctl::tensor::usm_ndarray ipiv_array,
-          dpctl::tensor::usm_ndarray dev_info_array,
+          py::list dev_info,
           const std::int64_t n,
           const std::vector<sycl::event> &depends = {});
 
@@ -50,7 +50,7 @@ extern std::pair<sycl::event, sycl::event>
     getrf_batch(sycl::queue exec_q,
                 dpctl::tensor::usm_ndarray a_array,
                 dpctl::tensor::usm_ndarray ipiv_array,
-                dpctl::tensor::usm_ndarray dev_info_array,
+                py::list dev_info,
                 std::int64_t n,
                 std::int64_t stride_a,
                 std::int64_t stride_ipiv,
diff --git a/dpnp/backend/extensions/lapack/getrf_batch.cpp b/dpnp/backend/extensions/lapack/getrf_batch.cpp
index 4bf69fdc73b7..22ee25f90745 100644
--- a/dpnp/backend/extensions/lapack/getrf_batch.cpp
+++ b/dpnp/backend/extensions/lapack/getrf_batch.cpp
@@ -55,7 +55,7 @@ typedef sycl::event (*getrf_batch_impl_fn_ptr_t)(
     std::int64_t *,
     std::int64_t,
     std::int64_t,
-    std::int64_t *,
+    py::list,
     std::vector<sycl::event> &,
     const std::vector<sycl::event> &);
 
@@ -71,7 +71,7 @@ static sycl::event getrf_batch_impl(sycl::queue exec_q,
                                     std::int64_t *ipiv,
                                     std::int64_t stride_ipiv,
                                     std::int64_t batch_size,
-                                    std::int64_t *dev_info,
+                                    py::list dev_info,
                                     std::vector<sycl::event> &host_task_events,
                                     const std::vector<sycl::event> &depends)
 {
@@ -87,7 +87,6 @@ static sycl::event getrf_batch_impl(sycl::queue exec_q,
     std::stringstream error_msg;
     std::int64_t info = 0;
     bool mkl_exception_caught = false;
-    bool mkl_singular_exception_caught = false;
     bool sycl_exception_caught = false;
 
     sycl::event getrf_batch_event;
@@ -126,7 +125,12 @@ static sycl::event getrf_batch_impl(sycl::queue exec_q,
             mkl_exception_caught = true;
         }
         else if (info > 0) {
-            mkl_singular_exception_caught = true;
+            // Store the positive 'info' value in the first element of
+            // 'dev_info'. This indicates that the factorization has been
+            // completed, but the factor U (upper triangular matrix) is exactly
+            // singular. The 'info' value here is the index of the first zero
+            // element in the diagonal of U.
+            dev_info[0] = info;
         }
         else {
             error_msg << "Unexpected MKL exception caught during getrf_batch() "
@@ -151,15 +155,6 @@ static sycl::event getrf_batch_impl(sycl::queue exec_q,
         throw std::runtime_error(error_msg.str());
     }
 
-    if (mkl_singular_exception_caught) {
-        sycl::event write_info_event = exec_q.submit([&](sycl::handler &cgh) {
-            cgh.depends_on(getrf_batch_event);
-            cgh.single_task([=]() { dev_info[0] = info; });
-        });
-
-        host_task_events.push_back(write_info_event);
-    }
-
     sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) {
         cgh.depends_on(getrf_batch_event);
         auto ctx = exec_q.get_context();
@@ -173,7 +168,7 @@ std::pair<sycl::event, sycl::event>
     getrf_batch(sycl::queue exec_q,
                 dpctl::tensor::usm_ndarray a_array,
                 dpctl::tensor::usm_ndarray ipiv_array,
-                dpctl::tensor::usm_ndarray dev_info_array,
+                py::list dev_info,
                 std::int64_t n,
                 std::int64_t stride_a,
                 std::int64_t stride_ipiv,
@@ -196,9 +191,7 @@ std::pair<sycl::event, sycl::event>
     }
 
     // check compatibility of execution queue and allocation queue
-    if (!dpctl::utils::queues_are_compatible(
-            exec_q, {a_array, ipiv_array, dev_info_array}))
-    {
+    if (!dpctl::utils::queues_are_compatible(exec_q, {a_array, ipiv_array})) {
         throw py::value_error(
             "Execution queue is not compatible with allocation queues");
     }
@@ -235,17 +228,13 @@ std::pair<sycl::event, sycl::event>
     char *ipiv_array_data = ipiv_array.get_data();
     std::int64_t *d_ipiv = reinterpret_cast<std::int64_t *>(ipiv_array_data);
 
-    char *dev_info_array_data = dev_info_array.get_data();
-    std::int64_t *d_dev_info =
-        reinterpret_cast<std::int64_t *>(dev_info_array_data);
-
     std::vector<sycl::event> host_task_events;
     sycl::event getrf_batch_ev = getrf_batch_fn(
         exec_q, n, a_array_data, lda, stride_a, d_ipiv, stride_ipiv, batch_size,
-        d_dev_info, host_task_events, depends);
+        dev_info, host_task_events, depends);
 
     sycl::event args_ev = dpctl::utils::keep_args_alive(
-        exec_q, {a_array, ipiv_array, dev_info_array}, host_task_events);
+        exec_q, {a_array, ipiv_array}, host_task_events);
 
     return std::make_pair(args_ev, getrf_batch_ev);
 }
diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index dcf5da31da6f..655872a291fb 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -260,12 +260,7 @@ def _lu_factor(a, res_type):
                 usm_type=a_usm_type,
                 sycl_queue=a_sycl_queue,
             )
-            dev_info_h = dpnp.zeros(
-                (batch_size,),
-                dtype=dpnp.int64,
-                usm_type=a_usm_type,
-                sycl_queue=a_sycl_queue,
-            )
+            dev_info_h = [0] * batch_size
 
             a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
                 src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a_sycl_queue
@@ -280,7 +275,7 @@ def _lu_factor(a, res_type):
                 a_sycl_queue,
                 a_h.get_array(),
                 ipiv_h.get_array(),
-                dev_info_h.get_array(),
+                dev_info_h,
                 n,
                 a_stride,
                 ipiv_stride,
@@ -291,7 +286,11 @@ def _lu_factor(a, res_type):
             ht_lapack_ev.wait()
             a_ht_copy_ev.wait()
 
-            return (a_h, ipiv_h, dev_info_h)
+            dev_info_array = dpnp.array(
+                dev_info_h, usm_type=a_usm_type, sycl_queue=a_sycl_queue
+            )
+
+            return (a_h, ipiv_h, dev_info_array)
 
         else:
             # Initialize lists for storing arrays and events for each batch
@@ -319,12 +318,7 @@ def _lu_factor(a, res_type):
                     usm_type=a_usm_type,
                     sycl_queue=a_sycl_queue,
                 )
-                dev_info_vecs[i] = dpnp.zeros(
-                    (1,),
-                    dtype=dpnp.int64,
-                    usm_type=a_usm_type,
-                    sycl_queue=a_sycl_queue,
-                )
+                dev_info_vecs[i] = [0]
 
                 # Call the LAPACK extension function _getrf
                 # to perform LU decomposition on each batch in 'a_vecs[i]'
@@ -332,7 +326,7 @@ def _lu_factor(a, res_type):
                     a_sycl_queue,
                     a_vecs[i].get_array(),
                     ipiv_vecs[i].get_array(),
-                    dev_info_vecs[i].get_array(),
+                    dev_info_vecs[i],
                     n,
                     [a_copy_ev],
                 )
@@ -344,7 +338,9 @@ def _lu_factor(a, res_type):
             # Reshape the results back to their original shape
             out_a = dpnp.array(a_vecs, order="C").reshape(orig_shape)
             out_ipiv = dpnp.array(ipiv_vecs).reshape(orig_shape[:-1])
-            out_dev_info = dpnp.array(dev_info_vecs).reshape(orig_shape[:-2])
+            out_dev_info = dpnp.array(
+                dev_info_vecs, usm_type=a_usm_type, sycl_queue=a_sycl_queue
+            ).reshape(orig_shape[:-2])
 
             return (out_a, out_ipiv, out_dev_info)
 
@@ -363,9 +359,7 @@ def _lu_factor(a, res_type):
         ipiv_h = dpnp.empty(
             n, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue
         )
-        dev_info_h = dpnp.zeros(
-            (1,), dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue
-        )
+        dev_info_h = [0]
 
         # Call the LAPACK extension function _getrf
         # to perform LU decomposition on the input matrix
@@ -373,7 +367,7 @@ def _lu_factor(a, res_type):
             a_sycl_queue,
             a_h.get_array(),
             ipiv_h.get_array(),
-            dev_info_h.get_array(),
+            dev_info_h,
             n,
             [a_copy_ev],
         )
@@ -381,10 +375,14 @@ def _lu_factor(a, res_type):
         ht_lapack_ev.wait()
         a_ht_copy_ev.wait()
 
+        dev_info_array = dpnp.array(
+            dev_info_h, usm_type=a_usm_type, sycl_queue=a_sycl_queue
+        )
+
         # Return a tuple containing the factorized matrix 'a_h',
         # pivot indices 'ipiv_h'
         # and the status 'dev_info_h' from the LAPACK getrf call
-        return (a_h, ipiv_h, dev_info_h)
+        return (a_h, ipiv_h, dev_info_array)
 
 
 def dpnp_eigh(a, UPLO):

From 030a083acef3cb5f81d7b0b30007230a8937192e Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 22 Dec 2023 00:51:01 +0100
Subject: [PATCH 43/55] Update getrf_rf error handler with
 mkl_lapack::batch_error

---
 .../backend/extensions/lapack/getrf_batch.cpp | 24 ++++++++++---------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/getrf_batch.cpp b/dpnp/backend/extensions/lapack/getrf_batch.cpp
index 22ee25f90745..20a03a085074 100644
--- a/dpnp/backend/extensions/lapack/getrf_batch.cpp
+++ b/dpnp/backend/extensions/lapack/getrf_batch.cpp
@@ -110,33 +110,35 @@ static sycl::event getrf_batch_impl(sycl::queue exec_q,
             scratchpad,  // Pointer to scratchpad memory to be used by MKL
                          // routine for storing intermediate results.
             scratchpad_size, depends);
+    } catch (mkl_lapack::batch_error const &be) {
+        // Get the indices of matrices within the batch that encountered an
+        // error
+        auto error_matrices_ids = be.ids();
+        // Get the indices of the first zero diagonal elements of these matrices
+        auto error_info = be.exceptions();
+
+        for (size_t i = 0; i < error_matrices_ids.size(); ++i) {
+            // Assign the index of the first zero diagonal element in each
+            // error matrix to the corresponding index in 'dev_info'
+            dev_info[error_matrices_ids[i]] = error_info[i];
+        }
     } catch (mkl_lapack::exception const &e) {
+        mkl_exception_caught = true;
         info = e.info();
 
         if (info < 0) {
             error_msg << "Parameter number " << -info
                       << " had an illegal value.";
-            mkl_exception_caught = true;
         }
         else if (info == scratchpad_size && e.detail() != 0) {
             error_msg
                 << "Insufficient scratchpad size. Required size is at least "
                 << e.detail();
-            mkl_exception_caught = true;
-        }
-        else if (info > 0) {
-            // Store the positive 'info' value in the first element of
-            // 'dev_info'. This indicates that the factorization has been
-            // completed, but the factor U (upper triangular matrix) is exactly
-            // singular. The 'info' value here is the index of the first zero
-            // element in the diagonal of U.
-            dev_info[0] = info;
         }
         else {
             error_msg << "Unexpected MKL exception caught during getrf_batch() "
                          "call:\nreason: "
                       << e.what() << "\ninfo: " << e.info();
-            mkl_exception_caught = true;
         }
     } catch (sycl::exception const &e) {
         error_msg

From 579b4e590322f0359f68257a095afff0553ab7b5 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 22 Dec 2023 01:08:51 +0100
Subject: [PATCH 44/55] Remove passing n parameter to _getrf

---
 dpnp/backend/extensions/lapack/getrf.cpp     | 3 ++-
 dpnp/backend/extensions/lapack/getrf.hpp     | 1 -
 dpnp/backend/extensions/lapack/lapack_py.cpp | 3 +--
 dpnp/linalg/dpnp_utils_linalg.py             | 2 --
 4 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp
index dcef32bda031..8d0e683df07f 100644
--- a/dpnp/backend/extensions/lapack/getrf.cpp
+++ b/dpnp/backend/extensions/lapack/getrf.cpp
@@ -155,7 +155,6 @@ std::pair<sycl::event, sycl::event>
           dpctl::tensor::usm_ndarray a_array,
           dpctl::tensor::usm_ndarray ipiv_array,
           py::list dev_info,
-          const std::int64_t n,
           const std::vector<sycl::event> &depends)
 {
     const int a_array_nd = a_array.get_ndim();
@@ -204,6 +203,8 @@ std::pair<sycl::event, sycl::event>
         throw py::value_error("The type of 'ipiv_array' must be int64.");
     }
 
+    const std::int64_t n = a_array.get_shape_raw()[0];
+
     char *a_array_data = a_array.get_data();
     const std::int64_t lda = std::max<size_t>(1UL, n);
 
diff --git a/dpnp/backend/extensions/lapack/getrf.hpp b/dpnp/backend/extensions/lapack/getrf.hpp
index 6b9a4891bfd8..d8ea425f6804 100644
--- a/dpnp/backend/extensions/lapack/getrf.hpp
+++ b/dpnp/backend/extensions/lapack/getrf.hpp
@@ -43,7 +43,6 @@ extern std::pair<sycl::event, sycl::event>
           dpctl::tensor::usm_ndarray a_array,
           dpctl::tensor::usm_ndarray ipiv_array,
           py::list dev_info,
-          const std::int64_t n,
           const std::vector<sycl::event> &depends = {});
 
 extern std::pair<sycl::event, sycl::event>
diff --git a/dpnp/backend/extensions/lapack/lapack_py.cpp b/dpnp/backend/extensions/lapack/lapack_py.cpp
index 2b5ad61678f6..3c6ba80575da 100644
--- a/dpnp/backend/extensions/lapack/lapack_py.cpp
+++ b/dpnp/backend/extensions/lapack/lapack_py.cpp
@@ -75,8 +75,7 @@ PYBIND11_MODULE(_lapack_impl, m)
           "Call `getrf` from OneMKL LAPACK library to return "
           "the LU factorization of a general n x n matrix",
           py::arg("sycl_queue"), py::arg("a_array"), py::arg("ipiv_array"),
-          py::arg("dev_info_array"), py::arg("n"),
-          py::arg("depends") = py::list());
+          py::arg("dev_info"), py::arg("depends") = py::list());
 
     m.def("_getrf_batch", &lapack_ext::getrf_batch,
           "Call `getrf_batch` from OneMKL LAPACK library to return "
diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index 655872a291fb..fdeabfa38ed4 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -327,7 +327,6 @@ def _lu_factor(a, res_type):
                     a_vecs[i].get_array(),
                     ipiv_vecs[i].get_array(),
                     dev_info_vecs[i],
-                    n,
                     [a_copy_ev],
                 )
 
@@ -368,7 +367,6 @@ def _lu_factor(a, res_type):
             a_h.get_array(),
             ipiv_h.get_array(),
             dev_info_h,
-            n,
             [a_copy_ev],
         )
 

From acd04b7ed85f8c365fb03fca1c30c1fb298d5ccc Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 22 Dec 2023 01:20:35 +0100
Subject: [PATCH 45/55] Add a new test_slogdet_singular_matrix_3D test

---
 tests/test_linalg.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tests/test_linalg.py b/tests/test_linalg.py
index a061f7e65be1..7bf21bd5a75b 100644
--- a/tests/test_linalg.py
+++ b/tests/test_linalg.py
@@ -704,6 +704,22 @@ def test_slogdet_singular_matrix(self, matrix):
         assert_allclose(sign_expected, sign_result)
         assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4)
 
+    # TODO: remove skipif when MKLD-16626 is resolved
+    # _getrf_batch does not raise an error with singular matrices.
+    # Skip running on cpu because dpnp uses _getrf_batch only on cpu.
+    @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626")
+    def test_slogdet_singular_matrix_3D(self):
+        a_np = numpy.array(
+            [[[1, 2], [3, 4]], [[1, 2], [1, 2]], [[1, 3], [3, 1]]]
+        )
+        a_dp = inp.array(a_np)
+
+        sign_expected, logdet_expected = numpy.linalg.slogdet(a_np)
+        sign_result, logdet_result = inp.linalg.slogdet(a_dp)
+
+        assert_allclose(sign_expected, sign_result)
+        assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4)
+
     def test_solve_errors(self):
         a_dp = inp.array([[1, 2], [3, 5]], dtype="float32")
 

From f0cc4d3b808c2a4344d93873e84be83a3f3cb377 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 22 Dec 2023 20:04:08 +0100
Subject: [PATCH 46/55] Update tests for dpnp.linalg.det

---
 tests/test_linalg.py | 136 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 102 insertions(+), 34 deletions(-)

diff --git a/tests/test_linalg.py b/tests/test_linalg.py
index 7bf21bd5a75b..8c1085e64684 100644
--- a/tests/test_linalg.py
+++ b/tests/test_linalg.py
@@ -105,46 +105,114 @@ def test_cond(arr, p):
     assert_array_equal(expected, result)
 
 
-@pytest.mark.skipif(is_cpu_device(), reason="MKL bug MKLD-16626")
-@pytest.mark.parametrize(
-    "array",
-    [
-        [[0, 0], [0, 0]],
-        [[1, 2], [1, 2]],
-        [[1, 2], [3, 4]],
-        [[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]],
+class TestDet:
+    @pytest.mark.parametrize(
+        "array",
         [
-            [[[1, 2], [3, 4]], [[1, 2], [2, 1]]],
-            [[[1, 3], [3, 1]], [[0, 1], [1, 3]]],
+            [[1, 2], [3, 4]],
+            [[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]],
+            [
+                [[[1, 2], [3, 4]], [[1, 2], [2, 1]]],
+                [[[1, 3], [3, 1]], [[0, 1], [1, 3]]],
+            ],
         ],
-    ],
-    ids=[
-        "[[0, 0], [0, 0]]",
-        "[[1, 2], [1, 2]]",
-        "[[1, 2], [3, 4]]",
-        "[[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]]",
-        "[[[[1, 2], [3, 4]], [[1, 2], [2, 1]]], [[[1, 3], [3, 1]], [[0, 1], [1, 3]]]]",
-    ],
-)
-def test_det(array):
-    a = numpy.array(array)
-    ia = inp.array(a)
-    result = inp.linalg.det(ia)
-    expected = numpy.linalg.det(a)
-    assert_allclose(expected, result)
+        ids=[
+            "2D_array",
+            "3D_array",
+            "4D_array",
+        ],
+    )
+    def test_det(self, array):
+        a = numpy.array(array)
+        ia = inp.array(a)
+        result = inp.linalg.det(ia)
+        expected = numpy.linalg.det(a)
+        assert_allclose(expected, result)
+
+    def test_det_strides(self):
+        a_np = numpy.array(
+            [
+                [2, 3, 1, 4, 5],
+                [5, 6, 7, 8, 9],
+                [9, 7, 7, 2, 3],
+                [1, 4, 5, 1, 8],
+                [8, 9, 8, 5, 3],
+            ]
+        )
 
+        a_dp = inp.array(a_np)
 
-def test_det_empty():
-    a = numpy.empty((0, 0, 2, 2), dtype=numpy.float32)
-    ia = inp.array(a)
+        # positive strides
+        expected = numpy.linalg.det(a_np[::2, ::2])
+        result = inp.linalg.det(a_dp[::2, ::2])
+        assert_allclose(expected, result, rtol=1e-3, atol=1e-4)
 
-    np_det = numpy.linalg.det(a)
-    dpnp_det = inp.linalg.det(ia)
+        # negative strides
+        expected = numpy.linalg.det(a_np[::-2, ::-2])
+        result = inp.linalg.det(a_dp[::-2, ::-2])
+        assert_allclose(expected, result, rtol=1e-3, atol=1e-4)
+
+    def test_det_empty(self):
+        a = numpy.empty((0, 0, 2, 2), dtype=numpy.float32)
+        ia = inp.array(a)
 
-    assert dpnp_det.dtype == np_det.dtype
-    assert dpnp_det.shape == np_det.shape
+        np_det = numpy.linalg.det(a)
+        dpnp_det = inp.linalg.det(ia)
 
-    assert_allclose(np_det, dpnp_det)
+        assert dpnp_det.dtype == np_det.dtype
+        assert dpnp_det.shape == np_det.shape
+
+        assert_allclose(np_det, dpnp_det)
+
+    @pytest.mark.parametrize(
+        "matrix",
+        [
+            [[1, 2], [2, 4]],
+            [[0, 0], [0, 0]],
+            [[1, 1], [1, 1]],
+            [[2, 4], [1, 2]],
+            [[1, 2], [0, 0]],
+            [[1, 0], [2, 0]],
+        ],
+        ids=[
+            "Linearly dependent rows",
+            "Zero matrix",
+            "Identical rows",
+            "Linearly dependent columns",
+            "Zero row",
+            "Zero column",
+        ],
+    )
+    def test_det_singular_matrix(self, matrix):
+        a_np = numpy.array(matrix, dtype="float32")
+        a_dp = inp.array(a_np)
+
+        expected = numpy.linalg.slogdet(a_np)
+        result = inp.linalg.slogdet(a_dp)
+
+        assert_allclose(expected, result, rtol=1e-3, atol=1e-4)
+
+    # TODO: remove skipif when MKLD-16626 is resolved
+    # _getrf_batch does not raise an error with singular matrices.
+    # Skip running on cpu because dpnp uses _getrf_batch only on cpu.
+    @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626")
+    def test_det_singular_matrix_3D(self):
+        a_np = numpy.array(
+            [[[1, 2], [3, 4]], [[1, 2], [1, 2]], [[1, 3], [3, 1]]]
+        )
+        a_dp = inp.array(a_np)
+
+        expected = numpy.linalg.det(a_np)
+        result = inp.linalg.det(a_dp)
+
+        assert_allclose(expected, result, rtol=1e-3, atol=1e-4)
+
+    def test_det_errors(self):
+        a_dp = inp.array([[1, 2], [3, 5]], dtype="float32")
+
+        # unsupported type
+        a_np = inp.asnumpy(a_dp)
+        assert_raises(TypeError, inp.linalg.det, a_np)
 
 
 @pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
@@ -720,7 +788,7 @@ def test_slogdet_singular_matrix_3D(self):
         assert_allclose(sign_expected, sign_result)
         assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4)
 
-    def test_solve_errors(self):
+    def test_slogdet_errors(self):
         a_dp = inp.array([[1, 2], [3, 5]], dtype="float32")
 
         # unsupported type

From c9b7c3bce857337ea8e02a667f5afbd25b6320fa Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Mon, 8 Jan 2024 12:41:53 +0100
Subject: [PATCH 47/55] Use is_exception_caught flag in getrf and getrf_batch
 error handler

---
 dpnp/backend/extensions/lapack/getrf.cpp       | 13 +++++--------
 dpnp/backend/extensions/lapack/getrf_batch.cpp | 10 ++++------
 2 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp
index 8d0e683df07f..13f759e4e4d8 100644
--- a/dpnp/backend/extensions/lapack/getrf.cpp
+++ b/dpnp/backend/extensions/lapack/getrf.cpp
@@ -77,8 +77,7 @@ static sycl::event getrf_impl(sycl::queue exec_q,
 
     std::stringstream error_msg;
     std::int64_t info = 0;
-    bool mkl_exception_caught = false;
-    bool sycl_exception_caught = false;
+    bool is_exception_caught = false;
 
     sycl::event getrf_event;
     try {
@@ -98,18 +97,17 @@ static sycl::event getrf_impl(sycl::queue exec_q,
                         // routine for storing intermediate results.
             scratchpad_size, depends);
     } catch (mkl_lapack::exception const &e) {
+        is_exception_caught = true;
         info = e.info();
 
         if (info < 0) {
             error_msg << "Parameter number " << -info
                       << " had an illegal value.";
-            mkl_exception_caught = true;
         }
         else if (info == scratchpad_size && e.detail() != 0) {
             error_msg
                 << "Insufficient scratchpad size. Required size is at least "
                 << e.detail();
-            mkl_exception_caught = true;
         }
         else if (info > 0) {
             // Store the positive 'info' value in the first element of
@@ -117,22 +115,21 @@ static sycl::event getrf_impl(sycl::queue exec_q,
             // completed, but the factor U (upper triangular matrix) is exactly
             // singular. The 'info' value here is the index of the first zero
             // element in the diagonal of U.
+            is_exception_caught = false;
             dev_info[0] = info;
         }
         else {
             error_msg << "Unexpected MKL exception caught during getrf() "
                          "call:\nreason: "
                       << e.what() << "\ninfo: " << e.info();
-            mkl_exception_caught = true;
         }
     } catch (sycl::exception const &e) {
+        is_exception_caught = true;
         error_msg << "Unexpected SYCL exception caught during getrf() call:\n"
                   << e.what();
-        sycl_exception_caught = true;
     }
 
-    if (mkl_exception_caught ||
-        sycl_exception_caught) // an unexpected error occurs
+    if (is_exception_caught) // an unexpected error occurs
     {
         if (scratchpad != nullptr) {
             sycl::free(scratchpad, exec_q);
diff --git a/dpnp/backend/extensions/lapack/getrf_batch.cpp b/dpnp/backend/extensions/lapack/getrf_batch.cpp
index 20a03a085074..4bfbca06a11a 100644
--- a/dpnp/backend/extensions/lapack/getrf_batch.cpp
+++ b/dpnp/backend/extensions/lapack/getrf_batch.cpp
@@ -86,8 +86,7 @@ static sycl::event getrf_batch_impl(sycl::queue exec_q,
 
     std::stringstream error_msg;
     std::int64_t info = 0;
-    bool mkl_exception_caught = false;
-    bool sycl_exception_caught = false;
+    bool is_exception_caught = false;
 
     sycl::event getrf_batch_event;
     try {
@@ -123,7 +122,7 @@ static sycl::event getrf_batch_impl(sycl::queue exec_q,
             dev_info[error_matrices_ids[i]] = error_info[i];
         }
     } catch (mkl_lapack::exception const &e) {
-        mkl_exception_caught = true;
+        is_exception_caught = true;
         info = e.info();
 
         if (info < 0) {
@@ -141,14 +140,13 @@ static sycl::event getrf_batch_impl(sycl::queue exec_q,
                       << e.what() << "\ninfo: " << e.info();
         }
     } catch (sycl::exception const &e) {
+        is_exception_caught = true;
         error_msg
             << "Unexpected SYCL exception caught during getrf_batch() call:\n"
             << e.what();
-        sycl_exception_caught = true;
     }
 
-    if (mkl_exception_caught ||
-        sycl_exception_caught) // an unexpected error occurs
+    if (is_exception_caught) // an unexpected error occurs
     {
         if (scratchpad != nullptr) {
             sycl::free(scratchpad, exec_q);

From a3873cd78a25416468a13e2af6a6e926fbbe45bc Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Mon, 8 Jan 2024 12:53:14 +0100
Subject: [PATCH 48/55] Update gesv error handler

---
 dpnp/backend/extensions/lapack/gesv.cpp | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/gesv.cpp b/dpnp/backend/extensions/lapack/gesv.cpp
index 72e5aa806714..781356a2f52a 100644
--- a/dpnp/backend/extensions/lapack/gesv.cpp
+++ b/dpnp/backend/extensions/lapack/gesv.cpp
@@ -84,7 +84,7 @@ static sycl::event gesv_impl(sycl::queue exec_q,
 
     std::stringstream error_msg;
     std::int64_t info = 0;
-    bool sycl_exception_caught = false;
+    bool is_exception_caught = false;
 
     sycl::event gesv_event;
     try {
@@ -106,12 +106,18 @@ static sycl::event gesv_impl(sycl::queue exec_q,
                         // routine for storing intermediate results.
             scratchpad_size, depends);
     } catch (mkl_lapack::exception const &e) {
+        is_exception_caught = true;
         info = e.info();
 
         if (info < 0) {
             error_msg << "Parameter number " << -info
                       << " had an illegal value.";
         }
+        else if (info == scratchpad_size && e.detail() != 0) {
+            error_msg
+                << "Insufficient scratchpad size. Required size is at least "
+                << e.detail();
+        }
         else if (info > 0) {
             T host_U;
             exec_q.memcpy(&host_U, &a[(info - 1) * lda + info - 1], sizeof(T))
@@ -131,23 +137,18 @@ static sycl::event gesv_impl(sycl::queue exec_q,
                           << e.what() << "\ninfo: " << e.info();
             }
         }
-        else if (info == scratchpad_size && e.detail() != 0) {
-            error_msg
-                << "Insufficient scratchpad size. Required size is at least "
-                << e.detail();
-        }
         else {
             error_msg << "Unexpected MKL exception caught during gesv() "
                          "call:\nreason: "
                       << e.what() << "\ninfo: " << e.info();
         }
     } catch (sycl::exception const &e) {
+        is_exception_caught = true;
         error_msg << "Unexpected SYCL exception caught during gesv() call:\n"
                   << e.what();
-        sycl_exception_caught = true;
     }
 
-    if (info != 0 || sycl_exception_caught) // an unexpected error occurs
+    if (is_exception_caught) // an unexpected error occurs
     {
         if (scratchpad != nullptr) {
             sycl::free(scratchpad, exec_q);

From 9652797a23810c5db4d8609b4756622ff00dd83a Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Mon, 8 Jan 2024 15:29:46 +0100
Subject: [PATCH 49/55] Reshape results after calling getrf_batch

---
 dpnp/linalg/dpnp_utils_linalg.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index fdeabfa38ed4..878b4fc480bb 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -290,6 +290,11 @@ def _lu_factor(a, res_type):
                 dev_info_h, usm_type=a_usm_type, sycl_queue=a_sycl_queue
             )
 
+            # Reshape the results back to their original shape
+            a_h = a_h.reshape(orig_shape)
+            ipiv_h = ipiv_h.reshape(orig_shape[:-1])
+            dev_info_array = dev_info_array.reshape(orig_shape[:-2])
+
             return (a_h, ipiv_h, dev_info_array)
 
         else:

From cef4690aa9c87df868435dd5dafbef3fb05507cd Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Tue, 9 Jan 2024 13:11:39 +0100
Subject: [PATCH 50/55] Add a new dpnp.linalg.det impl and refresh
 dpnp_utils_linalg

---
 dpnp/linalg/dpnp_iface_linalg.py              |   6 +-
 dpnp/linalg/dpnp_utils_linalg.py              | 155 ++++++++++++++----
 .../cupy/linalg_tests/test_norms.py           |   1 -
 3 files changed, 123 insertions(+), 39 deletions(-)

diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py
index 45b159428716..ab22482a81b6 100644
--- a/dpnp/linalg/dpnp_iface_linalg.py
+++ b/dpnp/linalg/dpnp_iface_linalg.py
@@ -50,6 +50,7 @@
 from .dpnp_utils_linalg import (
     check_stacked_2d,
     check_stacked_square,
+    dpnp_det,
     dpnp_eigh,
     dpnp_slogdet,
     dpnp_solve,
@@ -195,9 +196,10 @@ def det(a):
     """
 
     dpnp.check_supported_arrays_type(a)
+    check_stacked_2d(a)
+    check_stacked_square(a)
 
-    sign, logdet = slogdet(a)
-    return sign * dpnp.exp(logdet)
+    return dpnp_det(a)
 
 
 def eig(x1):
diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index 878b4fc480bb..86227159c187 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -37,6 +37,7 @@
 __all__ = [
     "check_stacked_2d",
     "check_stacked_square",
+    "dpnp_det",
     "dpnp_eigh",
     "dpnp_slogdet",
     "dpnp_solve",
@@ -53,6 +54,50 @@
 }
 
 
+def _calculate_determinant_sign(ipiv, diag, res_type, n):
+    """
+    Calculate the sign of the determinant based on row exchanges and diagonal values.
+
+    Parameters
+    -----------
+    ipiv : {dpnp.ndarray, usm_ndarray}
+        The pivot indices from LU decomposition.
+    diag : {dpnp.ndarray, usm_ndarray}
+        The diagonal elements of the LU decomposition matrix.
+    res_type : dpnp.dtype
+        The common data type for linalg operations.
+    n : int
+        The size of the last two dimensions of the array.
+
+    Returns
+    -------
+    sign : {dpnp_array, usm_ndarray}
+        The sign of the determinant.
+
+    """
+
+    # Checks for row exchanges in LU decomposition affecting determinant sign.
+    ipiv_diff = ipiv != dpnp.arange(
+        1, n + 1, usm_type=ipiv.usm_type, sycl_queue=ipiv.sycl_queue
+    )
+
+    # Counts row exchanges from 'ipiv_diff'.
+    non_zero = dpnp.count_nonzero(ipiv_diff, axis=-1)
+
+    # For floating types, adds count of negative diagonal elements
+    # to determine determinant sign.
+    if dpnp.issubdtype(res_type, dpnp.floating):
+        non_zero += dpnp.count_nonzero(diag < 0, axis=-1)
+
+    sign = (non_zero % 2) * -2 + 1
+
+    # For complex types, compute sign from the phase of diagonal elements.
+    if dpnp.issubdtype(res_type, dpnp.complexfloating):
+        sign = sign * dpnp.prod(diag / dpnp.abs(diag), axis=-1)
+
+    return sign.astype(res_type)
+
+
 def _real_type(dtype, device=None):
     """
     Returns the real data type corresponding to a given dpnp data type.
@@ -84,7 +129,7 @@ def check_stacked_2d(*arrays):
 
     Parameters
     ----------
-    arrays : {dpnp_array, usm_ndarray}
+    arrays : {dpnp.ndarray, usm_ndarray}
         A sequence of input arrays to check for dimensionality.
 
     Returns
@@ -123,7 +168,7 @@ def check_stacked_square(*arrays):
 
     Parameters
     ----------
-    arrays : {dpnp_array, usm_ndarray}
+    arrays : {dpnp.ndarray, usm_ndarray}
         A sequence of input arrays to check for square matrix shape.
 
     Returns
@@ -148,8 +193,6 @@ def check_stacked_square(*arrays):
 
 def _common_type(*arrays):
     """
-    _common_type(*arrays)
-
     Common type for linear algebra operations.
 
     This function determines the common data type for linalg operations.
@@ -160,12 +203,15 @@ def _common_type(*arrays):
     - The default floating-point data type is determined by the capabilities of the device
       on which `arrays` are created, as indicated by `dpnp.default_float_type()`.
 
-    Args:
-        *arrays (dpnp.ndarray): Input arrays.
-
-    Returns:
-        dtype_common (dtype): The common data type for linalg operations.
+    Parameters
+    ----------
+    arrays : {dpnp.ndarray, usm_ndarray}
+        A sequence of input arrays.
 
+    Returns
+    -------
+    dtype_common : dpnp.dtype
+        The common data type for linalg operations.
         This returned value is applicable both as the precision to be used
         in linalg calls and as the dtype of (possibly complex) output(s).
 
@@ -181,24 +227,27 @@ def _common_type(*arrays):
 
 def _common_inexact_type(default_dtype, *dtypes):
     """
-    _common_inexact_type(default_dtype, *dtypes)
-
     Determines the common 'inexact' data type for linear algebra operations.
 
     This function selects an 'inexact' data type appropriate for the device's capabilities.
     It defaults to `default_dtype` when provided types are not 'inexact'.
 
-    Args:
-        default_dtype: The default data type. This is determined by the capabilities of
+    Parameters
+    ----------
+    default_dtype : dpnp.dtype
+        The default data type. This is determined by the capabilities of
         the device and is used when none of the provided types are 'inexact'.
         *dtypes: A variable number of data types to be evaluated to find
         the common 'inexact' type.
 
-    Returns:
-        dpnp.result_type (dtype) : The resultant 'inexact' data type for linalg operations,
+    Returns
+    -------
+    dpnp.result_type : dpnp.dtype
+        The resultant 'inexact' data type for linalg operations,
         ensuring computational compatibility.
 
     """
+
     inexact_dtypes = [
         dt if issubdtype(dt, dpnp.inexact) else default_dtype for dt in dtypes
     ]
@@ -214,15 +263,15 @@ def _lu_factor(a, res_type):
 
     Parameters
     ----------
-        a : (..., M, M) {dpnp.ndarray, usm_ndarray}
-            Input array containing the matrices to be decomposed.
-        res_type : dpnp.dtype
-            Specifies the data type of the result.
-            Acceptable data types are float32, float64, complex64, or complex128.
+    a : (..., M, M) {dpnp.ndarray, usm_ndarray}
+        Input array containing the matrices to be decomposed.
+    res_type : dpnp.dtype
+        Specifies the data type of the result.
+        Acceptable data types are float32, float64, complex64, or complex128.
 
     Returns
     -------
-        tuple:
+    tuple:
         lu_t : (..., N, N) {dpnp.ndarray, usm_ndarray}
             Combined 'L' and 'U' matrices from LU decomposition
             excluding the diagonal of 'L'.
@@ -388,6 +437,54 @@ def _lu_factor(a, res_type):
         return (a_h, ipiv_h, dev_info_array)
 
 
+def dpnp_det(a):
+    """
+    dpnp_det(a)
+
+    Returns the determinant of `a` array.
+
+    """
+
+    a_usm_type = a.usm_type
+    a_sycl_queue = a.sycl_queue
+
+    res_type = _common_type(a)
+    det_dtype = _real_type(res_type)
+
+    a_shape = a.shape
+    shape = a_shape[:-2]
+    n = a_shape[-2]
+
+    if a.size == 0:
+        # empty batch (result is empty, too) or empty matrices det([[]]) == 1
+        det = dpnp.ones(
+            shape,
+            dtype=det_dtype,
+            usm_type=a_usm_type,
+            sycl_queue=a_sycl_queue,
+        )
+        return det
+
+    lu, ipiv, dev_info = _lu_factor(a, res_type)
+
+    # Transposing 'lu' to swap the last two axes for compatibility
+    # with 'dpnp.diagonal' as it does not support 'axis1' and 'axis2' arguments.
+    # TODO: Replace with 'dpnp.diagonal(lu, axis1=-2, axis2=-1)' when supported.
+    lu_transposed = lu.transpose(-2, -1, *range(lu.ndim - 2))
+    diag = dpnp.diagonal(lu_transposed)
+
+    det = dpnp.prod(diag, axis=-1)
+
+    sign = _calculate_determinant_sign(ipiv, diag, res_type, n)
+
+    det = sign * det
+    det = det.astype(det_dtype, copy=False)
+    singular = dpnp.array([dev_info > 0])
+    det = dpnp.where(singular, res_type.type(0), det)
+
+    return det.reshape(shape)
+
+
 def dpnp_eigh(a, UPLO):
     """
     dpnp_eigh(a, UPLO)
@@ -689,22 +786,8 @@ def dpnp_slogdet(a):
 
     logdet = dpnp.log(dpnp.abs(diag)).sum(axis=-1)
 
-    # ipiv is 1-origin
-    non_zero = dpnp.count_nonzero(
-        ipiv
-        != dpnp.arange(
-            1, n + 1, usm_type=ipiv.usm_type, sycl_queue=ipiv.sycl_queue
-        ),
-        axis=-1,
-    )
-    if dpnp.issubdtype(res_type, dpnp.floating):
-        non_zero += dpnp.count_nonzero(diag < 0, axis=-1)
-
-    sign = (non_zero % 2) * -2 + 1
-    if dpnp.issubdtype(res_type, dpnp.complexfloating):
-        sign = sign * dpnp.prod(diag / dpnp.abs(diag), axis=-1)
+    sign = _calculate_determinant_sign(ipiv, diag, res_type, n)
 
-    sign = sign.astype(res_type)
     logdet = logdet.astype(logdet_dtype, copy=False)
     singular = dpnp.array([dev_info > 0])
     return (
diff --git a/tests/third_party/cupy/linalg_tests/test_norms.py b/tests/third_party/cupy/linalg_tests/test_norms.py
index bcf2c21d27f8..41d4830e84d9 100644
--- a/tests/third_party/cupy/linalg_tests/test_norms.py
+++ b/tests/third_party/cupy/linalg_tests/test_norms.py
@@ -4,7 +4,6 @@
 import pytest
 
 import dpnp as cupy
-from tests.helper import is_cpu_device
 from tests.third_party.cupy import testing
 
 

From 67681bff019c8a8e53b09b34cb31c57a084c7f85 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Tue, 9 Jan 2024 13:20:04 +0100
Subject: [PATCH 51/55] Remove Limitations from dpnp_det and dpnp_slogdet
 docstings

---
 dpnp/linalg/dpnp_iface_linalg.py | 22 ++++++----------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py
index ab22482a81b6..b7a56c4c7db2 100644
--- a/dpnp/linalg/dpnp_iface_linalg.py
+++ b/dpnp/linalg/dpnp_iface_linalg.py
@@ -167,11 +167,6 @@ def det(a):
     det : (...) dpnp.ndarray
         Determinant of `a`.
 
-    Limitations
-    -----------
-    Parameter `a` is supported as :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
-
     See Also
     --------
     :obj:`dpnp.linalg.slogdet` : Returns sign and logarithm of the determinant of an array.
@@ -674,17 +669,12 @@ def slogdet(a):
 
     Returns
     -------
-        sign : (...) dpnp.ndarray
-            A number representing the sign of the determinant. For a real matrix,
-            this is 1, 0, or -1. For a complex matrix, this is a complex number
-            with absolute value 1 (i.e., it is on the unit circle), or else 0.
-        logabsdet : (...) dpnp.ndarray
-            The natural log of the absolute value of the determinant.
-
-    Limitations
-    -----------
-    Parameter `a` is supported as :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+    sign : (...) dpnp.ndarray
+        A number representing the sign of the determinant. For a real matrix,
+        this is 1, 0, or -1. For a complex matrix, this is a complex number
+        with absolute value 1 (i.e., it is on the unit circle), or else 0.
+    logabsdet : (...) dpnp.ndarray
+        The natural log of the absolute value of the determinant.
 
     See Also
     --------

From ed71f6b2b654142de6e1cf8b66fd657ae6c8f976 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 10 Jan 2024 12:41:15 +0100
Subject: [PATCH 52/55] Address remarks

---
 dpnp/backend/extensions/lapack/getrf.cpp       | 11 +++++++++++
 dpnp/backend/extensions/lapack/getrf_batch.cpp | 11 +++++++++++
 dpnp/linalg/dpnp_utils_linalg.py               | 12 +++++++++---
 tests/test_linalg.py                           |  5 +++--
 4 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp
index 13f759e4e4d8..faebb9f7d42f 100644
--- a/dpnp/backend/extensions/lapack/getrf.cpp
+++ b/dpnp/backend/extensions/lapack/getrf.cpp
@@ -175,11 +175,22 @@ std::pair<sycl::event, sycl::event>
             "Execution queue is not compatible with allocation queues");
     }
 
+    auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
+    if (overlap(a_array, ipiv_array)) {
+        throw py::value_error("The input array and the array of pivot indices "
+                              "are overlapping segments of memory");
+    }
+
     bool is_a_array_c_contig = a_array.is_c_contiguous();
+    bool is_ipiv_array_c_contig = ipiv_array.is_c_contiguous();
     if (!is_a_array_c_contig) {
         throw py::value_error("The input array "
                               "must be C-contiguous");
     }
+    if (!is_ipiv_array_c_contig) {
+        throw py::value_error("The array of pivot indices "
+                              "must be C-contiguous");
+    }
 
     auto array_types = dpctl_td_ns::usm_ndarray_types();
     int a_array_type_id =
diff --git a/dpnp/backend/extensions/lapack/getrf_batch.cpp b/dpnp/backend/extensions/lapack/getrf_batch.cpp
index 4bfbca06a11a..0944d7437aed 100644
--- a/dpnp/backend/extensions/lapack/getrf_batch.cpp
+++ b/dpnp/backend/extensions/lapack/getrf_batch.cpp
@@ -196,11 +196,22 @@ std::pair<sycl::event, sycl::event>
             "Execution queue is not compatible with allocation queues");
     }
 
+    auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
+    if (overlap(a_array, ipiv_array)) {
+        throw py::value_error("The input array and the array of pivot indices "
+                              "are overlapping segments of memory");
+    }
+
     bool is_a_array_c_contig = a_array.is_c_contiguous();
+    bool is_ipiv_array_c_contig = ipiv_array.is_c_contiguous();
     if (!is_a_array_c_contig) {
         throw py::value_error("The input array "
                               "must be C-contiguous");
     }
+    if (!is_ipiv_array_c_contig) {
+        throw py::value_error("The array of pivot indices "
+                              "must be C-contiguous");
+    }
 
     auto array_types = dpctl_td_ns::usm_ndarray_types();
     int a_array_type_id =
diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index 86227159c187..ba402e8ccb7a 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -306,6 +306,7 @@ def _lu_factor(a, res_type):
             ipiv_h = dpnp.empty(
                 (batch_size, n),
                 dtype=dpnp.int64,
+                order="C",
                 usm_type=a_usm_type,
                 sycl_queue=a_sycl_queue,
             )
@@ -369,6 +370,7 @@ def _lu_factor(a, res_type):
                 ipiv_vecs[i] = dpnp.empty(
                     (n,),
                     dtype=dpnp.int64,
+                    order="C",
                     usm_type=a_usm_type,
                     sycl_queue=a_sycl_queue,
                 )
@@ -410,7 +412,11 @@ def _lu_factor(a, res_type):
         )
 
         ipiv_h = dpnp.empty(
-            n, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue
+            n,
+            dtype=dpnp.int64,
+            order="C",
+            usm_type=a_usm_type,
+            sycl_queue=a_sycl_queue,
         )
         dev_info_h = [0]
 
@@ -479,7 +485,7 @@ def dpnp_det(a):
 
     det = sign * det
     det = det.astype(det_dtype, copy=False)
-    singular = dpnp.array([dev_info > 0])
+    singular = dev_info > 0
     det = dpnp.where(singular, res_type.type(0), det)
 
     return det.reshape(shape)
@@ -789,7 +795,7 @@ def dpnp_slogdet(a):
     sign = _calculate_determinant_sign(ipiv, diag, res_type, n)
 
     logdet = logdet.astype(logdet_dtype, copy=False)
-    singular = dpnp.array([dev_info > 0])
+    singular = dev_info > 0
     return (
         dpnp.where(singular, res_type.type(0), sign).reshape(shape),
         dpnp.where(singular, logdet_dtype.type("-inf"), logdet).reshape(shape),
diff --git a/tests/test_linalg.py b/tests/test_linalg.py
index 8c1085e64684..735cf1a8918c 100644
--- a/tests/test_linalg.py
+++ b/tests/test_linalg.py
@@ -122,8 +122,9 @@ class TestDet:
             "4D_array",
         ],
     )
-    def test_det(self, array):
-        a = numpy.array(array)
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
+    def test_det(self, array, dtype):
+        a = numpy.array(array, dtype=dtype)
         ia = inp.array(a)
         result = inp.linalg.det(ia)
         expected = numpy.linalg.det(a)

From 75fa23a62a0149dcf608633567fa19e555fa8f5a Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 11 Jan 2024 12:45:39 +0100
Subject: [PATCH 53/55] Remove det_dtype variable and use the abs val of diag
 for det

---
 dpnp/linalg/dpnp_utils_linalg.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index ba402e8ccb7a..bd44b9742534 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -455,7 +455,6 @@ def dpnp_det(a):
     a_sycl_queue = a.sycl_queue
 
     res_type = _common_type(a)
-    det_dtype = _real_type(res_type)
 
     a_shape = a.shape
     shape = a_shape[:-2]
@@ -465,7 +464,7 @@ def dpnp_det(a):
         # empty batch (result is empty, too) or empty matrices det([[]]) == 1
         det = dpnp.ones(
             shape,
-            dtype=det_dtype,
+            dtype=res_type,
             usm_type=a_usm_type,
             sycl_queue=a_sycl_queue,
         )
@@ -479,12 +478,12 @@ def dpnp_det(a):
     lu_transposed = lu.transpose(-2, -1, *range(lu.ndim - 2))
     diag = dpnp.diagonal(lu_transposed)
 
-    det = dpnp.prod(diag, axis=-1)
+    det = dpnp.prod(dpnp.abs(diag), axis=-1)
 
     sign = _calculate_determinant_sign(ipiv, diag, res_type, n)
 
     det = sign * det
-    det = det.astype(det_dtype, copy=False)
+    det = det.astype(res_type, copy=False)
     singular = dev_info > 0
     det = dpnp.where(singular, res_type.type(0), det)
 

From e9bdcd60bb47e4a81bc435f9d568e071336643bb Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 11 Jan 2024 12:46:44 +0100
Subject: [PATCH 54/55] Expand cupy tests for dpnp.linalg.det()

---
 .../cupy/linalg_tests/test_norms.py           | 31 ++++++++++++-------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/tests/third_party/cupy/linalg_tests/test_norms.py b/tests/third_party/cupy/linalg_tests/test_norms.py
index 41d4830e84d9..2ed49d16057a 100644
--- a/tests/third_party/cupy/linalg_tests/test_norms.py
+++ b/tests/third_party/cupy/linalg_tests/test_norms.py
@@ -4,81 +4,90 @@
 import pytest
 
 import dpnp as cupy
+from tests.helper import is_cpu_device
 from tests.third_party.cupy import testing
 
 
+# TODO: Remove the use of fixture for all tests in this file
+# when dpnp.prod() will support complex dtypes on Gen9
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestDet(unittest.TestCase):
-    @testing.for_float_dtypes(no_float16=True)
+    @testing.for_dtypes("fdFD")
     @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
     def test_det(self, xp, dtype):
         a = testing.shaped_arange((2, 2), xp, dtype) + 1
         return xp.linalg.det(a)
 
-    @testing.for_float_dtypes(no_float16=True)
+    @testing.for_dtypes("fdFD")
     @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
     def test_det_3(self, xp, dtype):
         a = testing.shaped_arange((2, 2, 2), xp, dtype) + 1
         return xp.linalg.det(a)
 
-    @testing.for_float_dtypes(no_float16=True)
+    @testing.for_dtypes("fdFD")
     @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
     def test_det_4(self, xp, dtype):
         a = testing.shaped_arange((2, 2, 2, 2), xp, dtype) + 1
         return xp.linalg.det(a)
 
-    @testing.for_float_dtypes(no_float16=True)
+    @testing.for_dtypes("fdFD")
     @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
     def test_det_empty_batch(self, xp, dtype):
         a = xp.empty((2, 0, 3, 3), dtype=dtype)
         return xp.linalg.det(a)
 
-    @testing.for_float_dtypes(no_float16=True)
+    @testing.for_dtypes("fdFD")
     @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
     def test_det_empty_matrix(self, xp, dtype):
         a = xp.empty((0, 0), dtype=dtype)
         return xp.linalg.det(a)
 
-    @testing.for_float_dtypes(no_float16=True)
+    @testing.for_dtypes("fdFD")
     @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
     def test_det_empty_matrices(self, xp, dtype):
         a = xp.empty((2, 3, 0, 0), dtype=dtype)
         return xp.linalg.det(a)
 
-    @testing.for_float_dtypes(no_float16=True)
+    @testing.for_dtypes("fdFD")
     def test_det_different_last_two_dims(self, dtype):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((2, 3, 2), xp, dtype)
             with pytest.raises(xp.linalg.LinAlgError):
                 xp.linalg.det(a)
 
-    @testing.for_float_dtypes(no_float16=True)
+    @testing.for_dtypes("fdFD")
     def test_det_different_last_two_dims_empty_batch(self, dtype):
         for xp in (numpy, cupy):
             a = xp.empty((0, 3, 2), dtype=dtype)
             with pytest.raises(xp.linalg.LinAlgError):
                 xp.linalg.det(a)
 
-    @testing.for_float_dtypes(no_float16=True)
+    @testing.for_dtypes("fdFD")
     def test_det_one_dim(self, dtype):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((2,), xp, dtype)
             with pytest.raises(xp.linalg.LinAlgError):
                 xp.linalg.det(a)
 
-    @testing.for_float_dtypes(no_float16=True)
+    @testing.for_dtypes("fdFD")
     def test_det_zero_dim(self, dtype):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((), xp, dtype)
             with pytest.raises(xp.linalg.LinAlgError):
                 xp.linalg.det(a)
 
-    @testing.for_float_dtypes(no_float16=True)
+    # TODO: remove skipif when MKLD-16626 is resolved
+    # _getrf_batch does not raise an error with singular matrices.
+    # Skip running on cpu because dpnp uses _getrf_batch only on cpu.
+    @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626")
+    @testing.for_dtypes("fdFD")
     @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
     def test_det_singular(self, xp, dtype):
         a = xp.zeros((2, 3, 3), dtype=dtype)
         return xp.linalg.det(a)
 
 
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestSlogdet(unittest.TestCase):
     @testing.for_dtypes("fdFD")
     @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)

From 549b5daee9b0873c23b600cdc413fe603af8c734 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 11 Jan 2024 16:21:17 +0100
Subject: [PATCH 55/55] Update TestDet and TestSlogdet

---
 tests/test_linalg.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tests/test_linalg.py b/tests/test_linalg.py
index 735cf1a8918c..cb780abd9253 100644
--- a/tests/test_linalg.py
+++ b/tests/test_linalg.py
@@ -106,6 +106,9 @@ def test_cond(arr, p):
 
 
 class TestDet:
+    # TODO: Remove the use of fixture for test_det
+    # when dpnp.prod() will support complex dtypes on Gen9
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @pytest.mark.parametrize(
         "array",
         [
@@ -690,6 +693,9 @@ def test_solve_errors(self):
 
 
 class TestSlogdet:
+    # TODO: Remove the use of fixture for test_slogdet_2d and test_slogdet_3d
+    # when dpnp.prod() will support complex dtypes on Gen9
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
     def test_slogdet_2d(self, dtype):
         a_np = numpy.array([[1, 2], [3, 4]], dtype=dtype)
@@ -701,6 +707,7 @@ def test_slogdet_2d(self, dtype):
         assert_allclose(sign_expected, sign_result)
         assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
     def test_slogdet_3d(self, dtype):
         a_np = numpy.array(