From 308b8cfb415b6cbcf36b7f374cd988993a33bbfd Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Mon, 23 Oct 2023 13:57:45 +0200 Subject: [PATCH 01/55] Add a new impl of dpnp.linalg._lu_factor --- dpnp/backend/extensions/lapack/CMakeLists.txt | 1 + dpnp/backend/extensions/lapack/getrf.cpp | 181 ++++++++++++++++++ dpnp/backend/extensions/lapack/getrf.hpp | 51 +++++ dpnp/backend/extensions/lapack/lapack_py.cpp | 8 + .../extensions/lapack/types_matrix.hpp | 26 +++ dpnp/linalg/dpnp_utils_linalg.py | 99 ++++++++++ 6 files changed, 366 insertions(+) create mode 100644 dpnp/backend/extensions/lapack/getrf.cpp create mode 100644 dpnp/backend/extensions/lapack/getrf.hpp diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt index 0c90b4f0ca52..6dbb36b67df9 100644 --- a/dpnp/backend/extensions/lapack/CMakeLists.txt +++ b/dpnp/backend/extensions/lapack/CMakeLists.txt @@ -29,6 +29,7 @@ pybind11_add_module(${python_module_name} MODULE lapack_py.cpp heevd.cpp syevd.cpp + getrf.cpp ) if (WIN32) diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp new file mode 100644 index 000000000000..0b988aedb794 --- /dev/null +++ b/dpnp/backend/extensions/lapack/getrf.cpp @@ -0,0 +1,181 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include + +// dpctl tensor headers +#include "utils/memory_overlap.hpp" +#include "utils/type_utils.hpp" + +#include "getrf.hpp" +#include "types_matrix.hpp" + +#include "dpnp_utils.hpp" + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace lapack +{ +namespace mkl_lapack = oneapi::mkl::lapack; +namespace py = pybind11; +namespace type_utils = dpctl::tensor::type_utils; + +typedef sycl::event (*getrf_impl_fn_ptr_t)(sycl::queue, + const std::int64_t, + char *, + std::int64_t, + std::int64_t *, + std::vector &, + const std::vector &); + +static getrf_impl_fn_ptr_t getrf_dispatch_vector[dpctl_td_ns::num_types]; + +template +static sycl::event getrf_impl(sycl::queue exec_q, + const std::int64_t n, + char *in_a, + std::int64_t lda, + std::int64_t *ipiv, + std::vector &host_task_events, + const std::vector &depends) +{ + type_utils::validate_type_for_device(exec_q); + + T *a = reinterpret_cast(in_a); + + const std::int64_t scratchpad_size = + oneapi::mkl::lapack::getrf_scratchpad_size(exec_q, n, n, lda); + T *scratchpad = nullptr; + + std::stringstream error_msg; + std::int64_t info = 0; + + sycl::event getrf_event; + try { + scratchpad = sycl::malloc_device(scratchpad_size, exec_q); + + getrf_event = oneapi::mkl::lapack::getrf( + exec_q, + n, // The order of the matrix A; (0 ≤ n). + n, // The order of the matrix A; (0 ≤ n). + a, // Pointer to the n-by-n coefficient matrix A. + lda, // The leading dimension of a. + ipiv, + scratchpad, // Pointer to scratchpad memory to be used by MKL + // routine for storing intermediate results + scratchpad_size, depends); + } catch (mkl_lapack::exception const &e) { + error_msg + << "Unexpected MKL exception caught during getrf() call:\nreason: " + << e.what() << "\ninfo: " << e.info(); + info = e.info(); + } catch (sycl::exception const &e) { + error_msg << "Unexpected SYCL exception caught during getrf() call:\n" + << e.what(); + info = -1; + } + + if (info != 0) // an unexpected error occurs + { + if (scratchpad != nullptr) { + sycl::free(scratchpad, exec_q); + } + throw std::runtime_error(error_msg.str()); + } + + sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(getrf_event); + auto ctx = exec_q.get_context(); + cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); }); + }); + host_task_events.push_back(clean_up_event); + return getrf_event; +} + +sycl::event getrf(sycl::queue q, + const std::int64_t n, + dpctl::tensor::usm_ndarray a_array, + dpctl::tensor::usm_ndarray ipiv_array, + const std::vector &depends = {}) +{ + + auto array_types = dpctl_td_ns::usm_ndarray_types(); + int a_array_type_id = + array_types.typenum_to_lookup_id(a_array.get_typenum()); + + getrf_impl_fn_ptr_t getrf_fn = getrf_dispatch_vector[a_array_type_id]; + if (getrf_fn == nullptr) { + throw py::value_error( + "No getrf implementation defined for the provided type " + "of the input matrix."); + } + + char *a_array_data = a_array.get_data(); + const std::int64_t lda = std::max(1UL, n); + + // check valid ipiv + + char *ipiv_array_data = ipiv_array.get_data(); + std::int64_t *d_ipiv = reinterpret_cast(ipiv_array_data); + + // std::vector ipiv(n); + // std::int64_t* d_ipiv = sycl::malloc_device(n, q); + + std::vector host_task_events; + sycl::event getrf_ev = + getrf_fn(q, n, a_array_data, lda, d_ipiv, host_task_events, depends); + + return getrf_ev; +} + +template +struct GetrfContigFactory +{ + fnT get() + { + if constexpr (types::GetrfTypePairSupportFactory::is_defined) { + return getrf_impl; + } + else { + return nullptr; + } + } +}; + +void init_getrf_dispatch_vector(void) +{ + dpctl_td_ns::DispatchVectorBuilder + contig; + contig.populate_dispatch_vector(getrf_dispatch_vector); +} +} // namespace lapack +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/extensions/lapack/getrf.hpp b/dpnp/backend/extensions/lapack/getrf.hpp new file mode 100644 index 000000000000..0fabf213960e --- /dev/null +++ b/dpnp/backend/extensions/lapack/getrf.hpp @@ -0,0 +1,51 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include + +#include + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace lapack +{ +sycl::event getrf(sycl::queue exec_q, + const std::int64_t n, + dpctl::tensor::usm_ndarray a_array, + dpctl::tensor::usm_ndarray ipiv_array, + const std::vector &depends); + +extern void init_getrf_dispatch_vector(void); +} // namespace lapack +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/extensions/lapack/lapack_py.cpp b/dpnp/backend/extensions/lapack/lapack_py.cpp index 97b67d59e24e..92f88c560159 100644 --- a/dpnp/backend/extensions/lapack/lapack_py.cpp +++ b/dpnp/backend/extensions/lapack/lapack_py.cpp @@ -30,6 +30,7 @@ #include #include +#include "getrf.hpp" #include "heevd.hpp" #include "syevd.hpp" @@ -40,6 +41,7 @@ namespace py = pybind11; void init_dispatch_vectors(void) { lapack_ext::init_syevd_dispatch_vector(); + lapack_ext::init_getrf_dispatch_vector(); } // populate dispatch tables @@ -66,4 +68,10 @@ PYBIND11_MODULE(_lapack_impl, m) py::arg("sycl_queue"), py::arg("jobz"), py::arg("upper_lower"), py::arg("eig_vecs"), py::arg("eig_vals"), py::arg("depends") = py::list()); + + m.def("_getrf", &lapack_ext::getrf, + "Call `getrf` from OneMKL LAPACK library to return " + "the LU factorization of a general n x n matrix", + py::arg("sycl_queue"), py::arg("n"), py::arg("a_array"), + py::arg("ipiv_array"), py::arg("depends") = py::list()); } diff --git a/dpnp/backend/extensions/lapack/types_matrix.hpp b/dpnp/backend/extensions/lapack/types_matrix.hpp index 3cab18d3c63d..b5276e5f1318 100644 --- a/dpnp/backend/extensions/lapack/types_matrix.hpp +++ b/dpnp/backend/extensions/lapack/types_matrix.hpp @@ -80,6 +80,32 @@ struct SyevdTypePairSupportFactory // fall-through dpctl_td_ns::NotDefinedEntry>::is_defined; }; + +/** + * @brief A factory to define pairs of supported types for which + * MKL LAPACK library provides support in oneapi::mkl::lapack::getrf + * function. + * + * @tparam T Type of array containing input matrix A, + * as well as the output array for storing the LU factorization. + */ +template +struct GetrfTypePairSupportFactory +{ + static constexpr bool is_defined = std::disjunction< + dpctl_td_ns::TypePairDefinedEntry, + dpctl_td_ns::TypePairDefinedEntry, + dpctl_td_ns::TypePairDefinedEntry, + T, + std::complex>, + dpctl_td_ns::TypePairDefinedEntry, + T, + std::complex>, + // fall-through + dpctl_td_ns::NotDefinedEntry>::is_defined; +}; } // namespace types } // namespace lapack } // namespace ext diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 54c01c20248e..98ac1ec8024a 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -164,3 +164,102 @@ def dpnp_eigh(a, UPLO): ht_copy_ev.wait() return w, out_v + + +def _lu_factor(a, dtype=dpnp.float32): + """Compute pivoted LU decomposition. + + Decompose a given batch of square matrices. Inputs and outputs are + transposed. + + Args: + a (dpnp.ndarray): The input matrix with dimension ``(..., N, N)``. + The dimension condition is not checked. + dtype (dpnp.dtype): float32, float64, complex64, or complex128. + + Returns: + tuple: + lu_t (dpnp.ndarray): + ``L`` without its unit diagonal and ``U`` with + dimension ``(..., N, N)``. + piv (dpnp.ndarray): + 1-origin pivot indices with dimension + ``(..., N)``. + + See Also + -------- + :obj:`scipy.linalg.lu_factor` + + """ + + a_usm_arr = dpnp.get_usm_ndarray(a) + + # TODO: use dpnp.linalg.LinAlgError + if a.ndim < 2: + raise ValueError( + f"{a.ndim}-dimensional array given. The input " + "array must be at least two-dimensional" + ) + + n, m = a.shape[-2:] + # TODO: use dpnp.linalg.LinAlgError + if m != n: + raise ValueError("Last 2 dimensions of the input array must be square") + + # orig_shape = a.shape + + a_order = "C" if a.flags.c_contiguous else "F" + + exec_q = a.sycl_queue + if exec_q is None: + raise ValueError( + "Execution placement can not be unambiguously inferred " + "from input arguments." + ) + + # TODO: Use linalg_common_type from #1598 + if dpnp.issubdtype(a.dtype, dpnp.floating): + res_type = ( + a.dtype if exec_q.sycl_device.has_aspect_fp64 else dpnp.float32 + ) + elif dpnp.issubdtype(a.dtype, dpnp.complexfloating): + res_type = ( + a.dtype if exec_q.sycl_device.has_aspect_fp64 else dpnp.complex64 + ) + else: + res_type = ( + dpnp.float64 if exec_q.sycl_device.has_aspect_fp64 else dpnp.float32 + ) + + a_h = dpnp.empty_like(a, order="C", dtype=res_type) + ipiv_h = dpnp.empty( + n, dtype=dpnp.int64, usm_type=a.usm_type, sycl_queue=a.sycl_queue + ) + + a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a.sycl_queue + ) + + lapack_ev = li._getrf( + exec_q, n, a_h.get_array(), ipiv_h.get_array(), [a_copy_ev] + ) + + if a_order != "C": + # need to align order of the result of solutions with the + # input array of multiple dependent variables + a_h_f = dpnp.empty_like(a_h, order=a_order) + ht_copy_out_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_h.get_array(), + dst=a_h_f.get_array(), + sycl_queue=a.sycl_queue, + depends=[lapack_ev], + ) + ht_copy_out_ev.wait() + out_v = (a_h_f, ipiv_h) + else: + out_v = (a_h, ipiv_h) + + lapack_ev.wait() + a_ht_copy_ev.wait() + + return out_v From 111934144b2d141d5d5836c672890c199d38a2ec Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 25 Oct 2023 18:32:20 +0200 Subject: [PATCH 02/55] Get dev_info_array after calling getrf --- dpnp/backend/extensions/lapack/getrf.cpp | 24 ++++++++++++++++---- dpnp/backend/extensions/lapack/getrf.hpp | 1 + dpnp/backend/extensions/lapack/lapack_py.cpp | 3 ++- 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp index 0b988aedb794..d48aee69693d 100644 --- a/dpnp/backend/extensions/lapack/getrf.cpp +++ b/dpnp/backend/extensions/lapack/getrf.cpp @@ -51,6 +51,7 @@ typedef sycl::event (*getrf_impl_fn_ptr_t)(sycl::queue, char *, std::int64_t, std::int64_t *, + std::int64_t *, std::vector &, const std::vector &); @@ -62,6 +63,7 @@ static sycl::event getrf_impl(sycl::queue exec_q, char *in_a, std::int64_t lda, std::int64_t *ipiv, + std::int64_t *dev_info, std::vector &host_task_events, const std::vector &depends) { @@ -106,9 +108,19 @@ static sycl::event getrf_impl(sycl::queue exec_q, if (scratchpad != nullptr) { sycl::free(scratchpad, exec_q); } - throw std::runtime_error(error_msg.str()); + + if (info < 0) { + throw std::runtime_error(error_msg.str()); + } } + sycl::event write_info_event = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(getrf_event); + cgh.single_task([=]() { dev_info[0] = info; }); + }); + + host_task_events.push_back(write_info_event); + sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(getrf_event); auto ctx = exec_q.get_context(); @@ -122,6 +134,7 @@ sycl::event getrf(sycl::queue q, const std::int64_t n, dpctl::tensor::usm_ndarray a_array, dpctl::tensor::usm_ndarray ipiv_array, + dpctl::tensor::usm_ndarray dev_info_array, const std::vector &depends = {}) { @@ -144,12 +157,13 @@ sycl::event getrf(sycl::queue q, char *ipiv_array_data = ipiv_array.get_data(); std::int64_t *d_ipiv = reinterpret_cast(ipiv_array_data); - // std::vector ipiv(n); - // std::int64_t* d_ipiv = sycl::malloc_device(n, q); + char *dev_info_array_data = dev_info_array.get_data(); + std::int64_t *d_dev_info = + reinterpret_cast(dev_info_array_data); std::vector host_task_events; - sycl::event getrf_ev = - getrf_fn(q, n, a_array_data, lda, d_ipiv, host_task_events, depends); + sycl::event getrf_ev = getrf_fn(q, n, a_array_data, lda, d_ipiv, d_dev_info, + host_task_events, depends); return getrf_ev; } diff --git a/dpnp/backend/extensions/lapack/getrf.hpp b/dpnp/backend/extensions/lapack/getrf.hpp index 0fabf213960e..583f4bf2d1e8 100644 --- a/dpnp/backend/extensions/lapack/getrf.hpp +++ b/dpnp/backend/extensions/lapack/getrf.hpp @@ -42,6 +42,7 @@ sycl::event getrf(sycl::queue exec_q, const std::int64_t n, dpctl::tensor::usm_ndarray a_array, dpctl::tensor::usm_ndarray ipiv_array, + dpctl::tensor::usm_ndarray dev_info_array, const std::vector &depends); extern void init_getrf_dispatch_vector(void); diff --git a/dpnp/backend/extensions/lapack/lapack_py.cpp b/dpnp/backend/extensions/lapack/lapack_py.cpp index 92f88c560159..38320d205aa3 100644 --- a/dpnp/backend/extensions/lapack/lapack_py.cpp +++ b/dpnp/backend/extensions/lapack/lapack_py.cpp @@ -73,5 +73,6 @@ PYBIND11_MODULE(_lapack_impl, m) "Call `getrf` from OneMKL LAPACK library to return " "the LU factorization of a general n x n matrix", py::arg("sycl_queue"), py::arg("n"), py::arg("a_array"), - py::arg("ipiv_array"), py::arg("depends") = py::list()); + py::arg("ipiv_array"), py::arg("dev_info_array"), + py::arg("depends") = py::list()); } From 71c2e96a47f6fabb529be229f8fbd8cfe04bbe3a Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 25 Oct 2023 18:38:38 +0200 Subject: [PATCH 03/55] Add an extra dev_info return to _lu_factor --- dpnp/linalg/dpnp_utils_linalg.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 98ac1ec8024a..915000785717 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -32,7 +32,7 @@ import dpnp import dpnp.backend.extensions.lapack._lapack_impl as li -__all__ = ["dpnp_eigh"] +__all__ = ["dpnp_eigh", "_lu_factor"] _jobz = {"N": 0, "V": 1} _upper_lower = {"U": 0, "L": 1} @@ -231,20 +231,28 @@ def _lu_factor(a, dtype=dpnp.float32): dpnp.float64 if exec_q.sycl_device.has_aspect_fp64 else dpnp.float32 ) - a_h = dpnp.empty_like(a, order="C", dtype=res_type) + a_h = dpnp.empty_like(a, order="F", dtype=res_type) ipiv_h = dpnp.empty( n, dtype=dpnp.int64, usm_type=a.usm_type, sycl_queue=a.sycl_queue ) + dev_info_h = dpnp.empty( + 1, dtype=dpnp.int64, usm_type=a.usm_type, sycl_queue=a.sycl_queue + ) a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a.sycl_queue ) lapack_ev = li._getrf( - exec_q, n, a_h.get_array(), ipiv_h.get_array(), [a_copy_ev] + exec_q, + n, + a_h.get_array(), + ipiv_h.get_array(), + dev_info_h.get_array(), + [a_copy_ev], ) - if a_order != "C": + if a_order != "F": # need to align order of the result of solutions with the # input array of multiple dependent variables a_h_f = dpnp.empty_like(a_h, order=a_order) @@ -255,9 +263,9 @@ def _lu_factor(a, dtype=dpnp.float32): depends=[lapack_ev], ) ht_copy_out_ev.wait() - out_v = (a_h_f, ipiv_h) + out_v = (a_h_f, ipiv_h, dev_info_h) else: - out_v = (a_h, ipiv_h) + out_v = (a_h, ipiv_h, dev_info_h) lapack_ev.wait() a_ht_copy_ev.wait() From b24c8c590e1dd82cb8f93d5de82b475d61a6a99c Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 26 Oct 2023 11:05:38 +0200 Subject: [PATCH 04/55] qwe --- dpnp/linalg/dpnp_iface_linalg.py | 130 ++++++++++++++++-- qwe.py | 39 ++++++ tests/test_linalg.py | 14 +- .../cupy/linalg_tests/test_norms.py | 8 ++ 4 files changed, 174 insertions(+), 17 deletions(-) create mode 100644 qwe.py create mode 100644 tests/third_party/cupy/linalg_tests/test_norms.py diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py index b41b96c70525..5815b1de4e1c 100644 --- a/dpnp/linalg/dpnp_iface_linalg.py +++ b/dpnp/linalg/dpnp_iface_linalg.py @@ -47,7 +47,7 @@ from dpnp.dpnp_utils import * from dpnp.linalg.dpnp_algo_linalg import * -from .dpnp_utils_linalg import dpnp_eigh +from .dpnp_utils_linalg import dpnp_eigh, _lu_factor __all__ = [ "cholesky", @@ -157,17 +157,19 @@ def det(input): Determinant of `input`. """ - x1_desc = dpnp.get_dpnp_descriptor(input, copy_when_nondefault_queue=False) - if x1_desc: - if x1_desc.ndim < 2: - pass - elif x1_desc.shape[-1] == x1_desc.shape[-2]: - result_obj = dpnp_det(x1_desc).get_pyobj() - result = dpnp.convert_single_elem_array_to_scalar(result_obj) + # x1_desc = dpnp.get_dpnp_descriptor(input, copy_when_nondefault_queue=False) + # if x1_desc: + # if x1_desc.ndim < 2: + # pass + # elif x1_desc.shape[-1] == x1_desc.shape[-2]: + # result_obj = dpnp_det(x1_desc).get_pyobj() + # result = dpnp.convert_single_elem_array_to_scalar(result_obj) - return result + # return result + + # return call_origin(numpy.linalg.det, input) - return call_origin(numpy.linalg.det, input) + return _lu_factor(input) def eig(x1): @@ -577,3 +579,111 @@ def svd(x1, full_matrices=True, compute_uv=True, hermitian=False): return call_origin( numpy.linalg.svd, x1, full_matrices, compute_uv, hermitian ) + +""" +def _lu_factor(a, dtype=dpnp.float32): + a_usm_arr = dpnp.get_usm_ndarray(a) + shape = a.shape[:-2] + + # TODO: use dpnp.linalg.LinAlgError + if a.ndim < 2: + raise ValueError( + f"{a.ndim}-dimensional array given. The input " + "array must be at least two-dimensional" + ) + + n, m = a.shape[-2:] + # TODO: use dpnp.linalg.LinAlgError + if m != n: + raise ValueError("Last 2 dimensions of the input array must be square") + + # orig_shape = a.shape + + a_order = "C" if a.flags.c_contiguous else "F" + + exec_q = a.sycl_queue + if exec_q is None: + raise ValueError( + "Execution placement can not be unambiguously inferred " + "from input arguments." + ) + + # TODO: Use linalg_common_type from #1598 + if dpnp.issubdtype(a.dtype, dpnp.floating): + res_type = ( + a.dtype if exec_q.sycl_device.has_aspect_fp64 else dpnp.float32 + ) + elif dpnp.issubdtype(a.dtype, dpnp.complexfloating): + res_type = ( + a.dtype if exec_q.sycl_device.has_aspect_fp64 else dpnp.complex64 + ) + else: + res_type = ( + dpnp.float64 if exec_q.sycl_device.has_aspect_fp64 else dpnp.float32 + ) + + logdet_dtype = numpy.dtype(res_type) + dtype = numpy.dtype(res_type) #a.dtype + + a_h = dpnp.empty_like(a, order="F", dtype=res_type) + ipiv_h = dpnp.empty( + n, dtype=dpnp.int64, usm_type=a.usm_type, sycl_queue=a.sycl_queue + ) + dev_info_h = dpnp.empty( + 1, dtype=dpnp.int64, usm_type=a.usm_type, sycl_queue=a.sycl_queue + ) + + a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a.sycl_queue + ) + + lapack_ev = li._getrf( + exec_q, n, a_h.get_array(), ipiv_h.get_array(),dev_info_h.get_array(), [a_copy_ev] + ) + + if a_order != "F": + # need to align order of the result of solutions with the + # input array of multiple dependent variables + a_h_f = dpnp.empty_like(a_h, order=a_order) + ht_copy_out_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_h.get_array(), + dst=a_h_f.get_array(), + sycl_queue=a.sycl_queue, + depends=[lapack_ev], + ) + ht_copy_out_ev.wait() + out_v = (a_h_f, ipiv_h, dev_info_h) + else: + out_v = (a_h, ipiv_h, dev_info_h) + + lapack_ev.wait() + a_ht_copy_ev.wait() + + lu_np = dpnp.asnumpy(out_v[0]) + ipiv_np = dpnp.asnumpy(out_v[1]) + dev_info_np = dpnp.asnumpy(out_v[2]) + + + diag = numpy.diagonal(lu_np, axis1=-2, axis2=-1) + logdet = numpy.log(numpy.abs(diag)).sum(axis=-1) + + non_zero = numpy.count_nonzero(ipiv_np != numpy.arange(1, n + 1), axis=-1) + if dtype.kind == "f": + non_zero += numpy.count_nonzero(diag < 0, axis=-1) + + sign = (non_zero % 2) * -2 + 1 + if dtype.kind == "c": + sign = sign * numpy.prod(diag / numpy.abs(diag), axis=-1) + + sign = sign.astype(dtype) + logdet = logdet.astype(logdet_dtype, copy=False) + singular = dev_info_np > 0 + res_sign = numpy.where(singular, dtype.type(0), sign).reshape(shape) + res_logdet = numpy.where(singular, logdet_dtype.type('-inf'), logdet).reshape(shape) + + res = res_sign * numpy.exp(res_logdet) + + return res + # return out_v + +""" diff --git a/qwe.py b/qwe.py new file mode 100644 index 000000000000..ab03b4a3b5ad --- /dev/null +++ b/qwe.py @@ -0,0 +1,39 @@ +# import dpnp +# from dpnp.linalg. +# import numpy + +# a = numpy.arange(1,49,dtype='f4').reshape(2,2,3,4) +# a = numpy.arange(1,5,dtype='f4').reshape(2,2) +# a_dp = dpnp.array(a) +# print(numpy.linalg.svd(a,compute_uv=False)) +# print("===================") +# print(dpnp.linalg.svd(a_dp,compute_uv=False)) +# print("===================") +# a_dp = dpnp.array(a,device='cpu') +# print(dpnp.linalg.svd(a_dp,full_matrices=False)) + +# print(numpy.linalg.svd(a,full_matrices=False)) +# res_np = numpy.linalg.svd(a) +# print(res_np[0].shape) +# print(res_np[1].shape) +# print(res_np[2].shape) +# print("===================") +# print(dpnp.linalg.svd(a_dp, full_matrices=False)) +# res_dp = dpnp.linalg.svd(a_dp) +# print(res_dp[0].shape) +# print(res_dp[1].shape) +# print(res_dp[2].shape) +# print(a_dp) + + +from dpnp.linalg.dpnp_utils_linalg import _lu_factor +import dpnp +import numpy +import scipy + +a = numpy.array([[1, 2], [3, 4]],dtype="f4",order='C') +a_dp = dpnp.array(a) + +print(scipy.linalg.lu_factor(a)) +print("===========================") +print(_lu_factor(a_dp)) diff --git a/tests/test_linalg.py b/tests/test_linalg.py index 7ce774d0abd6..89361795a6e2 100644 --- a/tests/test_linalg.py +++ b/tests/test_linalg.py @@ -106,18 +106,18 @@ def test_cond(arr, p): [[0, 0], [0, 0]], [[1, 2], [1, 2]], [[1, 2], [3, 4]], - [[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]], - [ - [[[1, 2], [3, 4]], [[1, 2], [2, 1]]], - [[[1, 3], [3, 1]], [[0, 1], [1, 3]]], - ], + # [[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]], + # [ + # [[[1, 2], [3, 4]], [[1, 2], [2, 1]]], + # [[[1, 3], [3, 1]], [[0, 1], [1, 3]]], + # ], ], ids=[ "[[0, 0], [0, 0]]", "[[1, 2], [1, 2]]", "[[1, 2], [3, 4]]", - "[[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]]", - "[[[[1, 2], [3, 4]], [[1, 2], [2, 1]]], [[[1, 3], [3, 1]], [[0, 1], [1, 3]]]]", + # "[[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]]", + # "[[[[1, 2], [3, 4]], [[1, 2], [2, 1]]], [[[1, 3], [3, 1]], [[0, 1], [1, 3]]]]", ], ) def test_det(array): diff --git a/tests/third_party/cupy/linalg_tests/test_norms.py b/tests/third_party/cupy/linalg_tests/test_norms.py new file mode 100644 index 000000000000..d4e95a5641f0 --- /dev/null +++ b/tests/third_party/cupy/linalg_tests/test_norms.py @@ -0,0 +1,8 @@ +import unittest + +import numpy +import pytest + +import dpnp as cupy +from tests.helper import has_support_aspect64 +from tests.third_party.cupy import testing From 500df36eb3b8de741500698b7b9ef00f81db1cb8 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 31 Oct 2023 11:29:05 +0100 Subject: [PATCH 05/55] Add a logic for a.ndim > 2 in _lu_factor --- dpnp/linalg/dpnp_utils_linalg.py | 134 ++++++++++++++++++++----------- 1 file changed, 85 insertions(+), 49 deletions(-) diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 915000785717..99e967a14aab 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -166,7 +166,7 @@ def dpnp_eigh(a, UPLO): return w, out_v -def _lu_factor(a, dtype=dpnp.float32): +def _lu_factor(a, res_type): """Compute pivoted LU decomposition. Decompose a given batch of square matrices. Inputs and outputs are @@ -192,8 +192,6 @@ def _lu_factor(a, dtype=dpnp.float32): """ - a_usm_arr = dpnp.get_usm_ndarray(a) - # TODO: use dpnp.linalg.LinAlgError if a.ndim < 2: raise ValueError( @@ -217,57 +215,95 @@ def _lu_factor(a, dtype=dpnp.float32): "from input arguments." ) - # TODO: Use linalg_common_type from #1598 - if dpnp.issubdtype(a.dtype, dpnp.floating): - res_type = ( - a.dtype if exec_q.sycl_device.has_aspect_fp64 else dpnp.float32 + if a.ndim > 2: + orig_shape = a.shape + a = a.reshape(-1, n, n) + batch_size = a.shape[0] + a_usm_arr = dpnp.get_usm_ndarray(a) + + a_vecs = [None] * batch_size + ipiv_h = dpnp.empty( + (batch_size, n), + dtype=dpnp.int64, + usm_type=a.usm_type, + sycl_queue=a.sycl_queue, ) - elif dpnp.issubdtype(a.dtype, dpnp.complexfloating): - res_type = ( - a.dtype if exec_q.sycl_device.has_aspect_fp64 else dpnp.complex64 + dev_info_h = dpnp.empty( + (batch_size,), + dtype=dpnp.int64, + usm_type=a.usm_type, + sycl_queue=a.sycl_queue, ) + a_ht_copy_ev = [None] * batch_size + ht_lapack_ev = [None] * batch_size + + for i in range(batch_size): + a_vecs[i] = dpnp.empty_like(a[i], order="F", dtype=res_type) + a_ht_copy_ev[i], a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_usm_arr[i], + dst=a_vecs[i].get_array(), + sycl_queue=a.sycl_queue, + ) + + ht_lapack_ev[i] = li._getrf( + exec_q, + n, + a_vecs[i].get_array(), + ipiv_h[i].get_array(), + dev_info_h[i].get_array(), + [a_copy_ev], + ) + + for i in range(batch_size): + ht_lapack_ev[i].wait() + a_ht_copy_ev[i].wait() + + out_v = dpnp.array(a_vecs, order=a_order).reshape(orig_shape) + out_ipiv = ipiv_h.reshape(orig_shape[:-1]) + out_dev_info = dev_info_h.reshape(orig_shape[:-2]) + + return (out_v, out_ipiv, out_dev_info) + else: - res_type = ( - dpnp.float64 if exec_q.sycl_device.has_aspect_fp64 else dpnp.float32 + a_usm_arr = dpnp.get_usm_ndarray(a) + + a_h = dpnp.empty_like(a, order="F", dtype=res_type) + ipiv_h = dpnp.empty( + n, dtype=dpnp.int64, usm_type=a.usm_type, sycl_queue=a.sycl_queue + ) + dev_info_h = dpnp.empty( + 1, dtype=dpnp.int64, usm_type=a.usm_type, sycl_queue=a.sycl_queue ) - a_h = dpnp.empty_like(a, order="F", dtype=res_type) - ipiv_h = dpnp.empty( - n, dtype=dpnp.int64, usm_type=a.usm_type, sycl_queue=a.sycl_queue - ) - dev_info_h = dpnp.empty( - 1, dtype=dpnp.int64, usm_type=a.usm_type, sycl_queue=a.sycl_queue - ) - - a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( - src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a.sycl_queue - ) - - lapack_ev = li._getrf( - exec_q, - n, - a_h.get_array(), - ipiv_h.get_array(), - dev_info_h.get_array(), - [a_copy_ev], - ) - - if a_order != "F": - # need to align order of the result of solutions with the - # input array of multiple dependent variables - a_h_f = dpnp.empty_like(a_h, order=a_order) - ht_copy_out_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray( - src=a_h.get_array(), - dst=a_h_f.get_array(), - sycl_queue=a.sycl_queue, - depends=[lapack_ev], + a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a.sycl_queue ) - ht_copy_out_ev.wait() - out_v = (a_h_f, ipiv_h, dev_info_h) - else: - out_v = (a_h, ipiv_h, dev_info_h) - lapack_ev.wait() - a_ht_copy_ev.wait() + lapack_ev = li._getrf( + exec_q, + n, + a_h.get_array(), + ipiv_h.get_array(), + dev_info_h.get_array(), + [a_copy_ev], + ) + + if a_order != "F": + # need to align order of the result of solutions with the + # input array of multiple dependent variables + a_h_f = dpnp.empty_like(a_h, order=a_order) + ht_copy_out_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_h.get_array(), + dst=a_h_f.get_array(), + sycl_queue=a.sycl_queue, + depends=[lapack_ev], + ) + ht_copy_out_ev.wait() + out_v = (a_h_f, ipiv_h, dev_info_h) + else: + out_v = (a_h, ipiv_h, dev_info_h) + + lapack_ev.wait() + a_ht_copy_ev.wait() - return out_v + return out_v From b35e28268269df4c50a0f4c2bac0a22c484d9fb4 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 31 Oct 2023 11:31:23 +0100 Subject: [PATCH 06/55] Add an implementation of dpnp.linalg.slogdet --- dpnp/linalg/dpnp_iface_linalg.py | 161 +++++++++++++++++-------------- 1 file changed, 86 insertions(+), 75 deletions(-) diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py index 5815b1de4e1c..18d8f359ae42 100644 --- a/dpnp/linalg/dpnp_iface_linalg.py +++ b/dpnp/linalg/dpnp_iface_linalg.py @@ -47,7 +47,7 @@ from dpnp.dpnp_utils import * from dpnp.linalg.dpnp_algo_linalg import * -from .dpnp_utils_linalg import dpnp_eigh, _lu_factor +from .dpnp_utils_linalg import _lu_factor, dpnp_eigh __all__ = [ "cholesky", @@ -63,6 +63,7 @@ "norm", "qr", "svd", + "slogdet", ] @@ -580,10 +581,39 @@ def svd(x1, full_matrices=True, compute_uv=True, hermitian=False): numpy.linalg.svd, x1, full_matrices, compute_uv, hermitian ) -""" -def _lu_factor(a, dtype=dpnp.float32): - a_usm_arr = dpnp.get_usm_ndarray(a) - shape = a.shape[:-2] + +def slogdet(a): + """Returns sign and logarithm of the determinant of an array. + + It calculates the natural logarithm of the determinant of a given value. + + Args: + a (dpnp.ndarray): The input matrix with dimension ``(..., N, N)``. + + Returns: + tuple of :class:`~dpnp.ndarray`: + It returns a tuple ``(sign, logdet)``. ``sign`` represents each + sign of the determinant as a real number ``0``, ``1`` or ``-1``. + 'logdet' represents the natural logarithm of the absolute of the + determinant. + If the determinant is zero, ``sign`` will be ``0`` and ``logdet`` + will be ``-inf``. + The shapes of both ``sign`` and ``logdet`` are equal to + ``a.shape[:-2]``. + + .. warning:: + This function calls one or more cuSOLVER routine(s) which may yield + invalid results if input conditions are not met. + To detect these invalid results, you can set the `linalg` + configuration to a value that is not `ignore` in + :func:`cupyx.errstate` or :func:`cupyx.seterr`. + + .. warning:: + To produce the same results as :func:`numpy.linalg.slogdet` for + singular inputs, set the `linalg` configuration to `raise`. + + .. seealso:: :func:`numpy.linalg.slogdet` + """ # TODO: use dpnp.linalg.LinAlgError if a.ndim < 2: @@ -597,17 +627,8 @@ def _lu_factor(a, dtype=dpnp.float32): if m != n: raise ValueError("Last 2 dimensions of the input array must be square") - # orig_shape = a.shape - - a_order = "C" if a.flags.c_contiguous else "F" - exec_q = a.sycl_queue - if exec_q is None: - raise ValueError( - "Execution placement can not be unambiguously inferred " - "from input arguments." - ) - + # dtype, sign_dtype = _util.linalg_common_type(a) # TODO: Use linalg_common_type from #1598 if dpnp.issubdtype(a.dtype, dpnp.floating): res_type = ( @@ -622,68 +643,58 @@ def _lu_factor(a, dtype=dpnp.float32): dpnp.float64 if exec_q.sycl_device.has_aspect_fp64 else dpnp.float32 ) - logdet_dtype = numpy.dtype(res_type) - dtype = numpy.dtype(res_type) #a.dtype - - a_h = dpnp.empty_like(a, order="F", dtype=res_type) - ipiv_h = dpnp.empty( - n, dtype=dpnp.int64, usm_type=a.usm_type, sycl_queue=a.sycl_queue - ) - dev_info_h = dpnp.empty( - 1, dtype=dpnp.int64, usm_type=a.usm_type, sycl_queue=a.sycl_queue - ) - - a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( - src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a.sycl_queue - ) - - lapack_ev = li._getrf( - exec_q, n, a_h.get_array(), ipiv_h.get_array(),dev_info_h.get_array(), [a_copy_ev] + res_type = dpnp.dtype(res_type) + logdet_dtype = dpnp.dtype(res_type.char.lower()) + + a_shape = a.shape + shape = a_shape[:-2] + n = a_shape[-2] + + if a.size == 0: + # empty batch (result is empty, too) or empty matrices det([[]]) == 1 + sign = dpnp.ones(shape, res_type) + logdet = dpnp.zeros(shape, logdet_dtype) + return sign, logdet + + lu, ipiv, dev_info = _lu_factor(a, res_type) + + # dev_info < 0 means illegal value (in dimensions, strides, and etc.) that + # should never happen even if the matrix contains nan or inf. + # TODO(kataoka): assert dev_info >= 0 if synchronization is allowed for + # debugging purposes. + + # Transposing 'lu' to swap the last two axes for compatibility + # with 'dpnp.diagonal' as it does not support 'axis1' and 'axis2' arguments. + # TODO: Replace with 'dpnp.diagonal(lu, axis1=-2, axis2=-1)' when supported. + lu_transposed = lu.transpose(-2, -1, *range(lu.ndim - 2)) + diag = dpnp.diagonal(lu_transposed) + + logdet = dpnp.log(dpnp.abs(diag)).sum(axis=-1) + + # Transposing 'ipiv' and reshaping 'arange_values' for element-wise comparison + # as 'dpnp.count_nonzero' does not support the 'axis' parameter. + ipiv_transposed = ipiv.transpose(-1, *range(ipiv.ndim - 1)) + arange_shape = [1] * a.ndim + arange_shape[-2] = n + arange_values = dpnp.arange(1, n + 1, sycl_queue=exec_q).reshape( + arange_shape ) - if a_order != "F": - # need to align order of the result of solutions with the - # input array of multiple dependent variables - a_h_f = dpnp.empty_like(a_h, order=a_order) - ht_copy_out_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray( - src=a_h.get_array(), - dst=a_h_f.get_array(), - sycl_queue=a.sycl_queue, - depends=[lapack_ev], - ) - ht_copy_out_ev.wait() - out_v = (a_h_f, ipiv_h, dev_info_h) - else: - out_v = (a_h, ipiv_h, dev_info_h) - - lapack_ev.wait() - a_ht_copy_ev.wait() - - lu_np = dpnp.asnumpy(out_v[0]) - ipiv_np = dpnp.asnumpy(out_v[1]) - dev_info_np = dpnp.asnumpy(out_v[2]) + # ipiv is 1-origin + # TODO: Replace with 'dpnp.count_nonzero(ipiv != dpnp.arange(1, n + 1), axis=-1)' + # when supported. + non_zero = dpnp.count_nonzero(ipiv_transposed != arange_values) + if res_type.kind == "f": + non_zero += dpnp.count_nonzero(diag < 0) + sign = -(1 ** (non_zero % 2)) + if res_type.kind == "c": + sign = sign * dpnp.prod(diag / dpnp.abs(diag), axis=-1) - diag = numpy.diagonal(lu_np, axis1=-2, axis2=-1) - logdet = numpy.log(numpy.abs(diag)).sum(axis=-1) - - non_zero = numpy.count_nonzero(ipiv_np != numpy.arange(1, n + 1), axis=-1) - if dtype.kind == "f": - non_zero += numpy.count_nonzero(diag < 0, axis=-1) - - sign = (non_zero % 2) * -2 + 1 - if dtype.kind == "c": - sign = sign * numpy.prod(diag / numpy.abs(diag), axis=-1) - - sign = sign.astype(dtype) + sign = sign.astype(res_type) logdet = logdet.astype(logdet_dtype, copy=False) - singular = dev_info_np > 0 - res_sign = numpy.where(singular, dtype.type(0), sign).reshape(shape) - res_logdet = numpy.where(singular, logdet_dtype.type('-inf'), logdet).reshape(shape) - - res = res_sign * numpy.exp(res_logdet) - - return res - # return out_v - -""" + singular = dev_info > 0 + return ( + dpnp.where(singular, res_type.type(0), sign).reshape(shape), + dpnp.where(singular, logdet_dtype.type("-inf"), logdet).reshape(shape), + ) From 301fc2af0ffb17693dcc93bd0e110f947544b4e0 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 31 Oct 2023 11:35:35 +0100 Subject: [PATCH 07/55] Add a new test_norms.py file in cupy tests --- .../cupy/linalg_tests/test_norms.py | 53 ++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/tests/third_party/cupy/linalg_tests/test_norms.py b/tests/third_party/cupy/linalg_tests/test_norms.py index d4e95a5641f0..71c8922a23ea 100644 --- a/tests/third_party/cupy/linalg_tests/test_norms.py +++ b/tests/third_party/cupy/linalg_tests/test_norms.py @@ -4,5 +4,56 @@ import pytest import dpnp as cupy -from tests.helper import has_support_aspect64 from tests.third_party.cupy import testing + + +class TestSlogdet(unittest.TestCase): + @testing.for_dtypes("fd") + @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) + def test_slogdet(self, xp, dtype): + a = testing.shaped_arange((2, 2), xp, dtype) + 1 + sign, logdet = xp.linalg.slogdet(a) + return sign, logdet + + @testing.for_dtypes("fd") + @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) + def test_slogdet_3(self, xp, dtype): + a = testing.shaped_arange((2, 2, 2), xp, dtype) + 1 + sign, logdet = xp.linalg.slogdet(a) + return sign, logdet + + @testing.for_dtypes("fd") + @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) + def test_slogdet_4(self, xp, dtype): + a = testing.shaped_arange((2, 2, 2, 2), xp, dtype) + 1 + sign, logdet = xp.linalg.slogdet(a) + return sign, logdet + + @testing.for_dtypes("fd") + @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) + def test_slogdet_singular(self, xp, dtype): + a = xp.zeros((3, 3), dtype=dtype) + sign, logdet = xp.linalg.slogdet(a) + return sign, logdet + + # @testing.for_dtypes("fd") + # @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) + # def test_slogdet_singular_errstate(self, xp, dtype): + # a = xp.zeros((3, 3), dtype) + # with cupyx.errstate(linalg="raise"): + # # `cupy.linalg.slogdet` internally catches `dev_info < 0` from + # # cuSOLVER, which should not affect `dev_info > 0` cases. + # sign, logdet = xp.linalg.slogdet(a) + # return sign, logdet + + @testing.for_dtypes("fd") + def test_slogdet_one_dim(self, dtype): + for xp in (numpy, cupy): + a = testing.shaped_arange((2,), xp, dtype) + if xp is numpy: + with pytest.raises(numpy.linalg.LinAlgError): + xp.linalg.slogdet(a) + else: + # Replace with dpnp.linalg.LinAlgError + with pytest.raises(ValueError): + xp.linalg.slogdet(a) From 9aba0146ed7e5ead9fa8587400e6fb80825eb0cf Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 31 Oct 2023 11:37:43 +0100 Subject: [PATCH 08/55] Expand test scope in public CI --- .github/workflows/conda-package.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index a909d860b9a5..2c9598899b98 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -28,6 +28,7 @@ env: test_umath.py test_usm_type.py third_party/cupy/core_tests + third_party/cupy/linalg_tests/test_norms.py third_party/cupy/linalg_tests/test_product.py third_party/cupy/logic_tests/test_comparison.py third_party/cupy/logic_tests/test_truth.py From a8789e46e7985b149e76a67a99712b19c40ed38b Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 22 Nov 2023 20:32:54 +0100 Subject: [PATCH 09/55] A small update _lu_factor func --- dpnp/linalg/dpnp_utils_linalg.py | 35 ++++++++++++++------------------ 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 99e967a14aab..7e74884dda18 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -175,7 +175,7 @@ def _lu_factor(a, res_type): Args: a (dpnp.ndarray): The input matrix with dimension ``(..., N, N)``. The dimension condition is not checked. - dtype (dpnp.dtype): float32, float64, complex64, or complex128. + res_type (dpnp.dtype): float32, float64, complex64 or complex128. Returns: tuple: @@ -204,16 +204,11 @@ def _lu_factor(a, res_type): if m != n: raise ValueError("Last 2 dimensions of the input array must be square") - # orig_shape = a.shape - a_order = "C" if a.flags.c_contiguous else "F" + a_sycl_queue = a.sycl_queue + a_usm_type = a.usm_type - exec_q = a.sycl_queue - if exec_q is None: - raise ValueError( - "Execution placement can not be unambiguously inferred " - "from input arguments." - ) + # TODO: use getrf_batch if a.ndim > 2: orig_shape = a.shape @@ -225,14 +220,14 @@ def _lu_factor(a, res_type): ipiv_h = dpnp.empty( (batch_size, n), dtype=dpnp.int64, - usm_type=a.usm_type, - sycl_queue=a.sycl_queue, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, ) dev_info_h = dpnp.empty( (batch_size,), dtype=dpnp.int64, - usm_type=a.usm_type, - sycl_queue=a.sycl_queue, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, ) a_ht_copy_ev = [None] * batch_size ht_lapack_ev = [None] * batch_size @@ -242,11 +237,11 @@ def _lu_factor(a, res_type): a_ht_copy_ev[i], a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( src=a_usm_arr[i], dst=a_vecs[i].get_array(), - sycl_queue=a.sycl_queue, + sycl_queue=a_sycl_queue, ) ht_lapack_ev[i] = li._getrf( - exec_q, + a_sycl_queue, n, a_vecs[i].get_array(), ipiv_h[i].get_array(), @@ -269,18 +264,18 @@ def _lu_factor(a, res_type): a_h = dpnp.empty_like(a, order="F", dtype=res_type) ipiv_h = dpnp.empty( - n, dtype=dpnp.int64, usm_type=a.usm_type, sycl_queue=a.sycl_queue + n, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue ) dev_info_h = dpnp.empty( - 1, dtype=dpnp.int64, usm_type=a.usm_type, sycl_queue=a.sycl_queue + 1, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue ) a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( - src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a.sycl_queue + src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a_sycl_queue ) lapack_ev = li._getrf( - exec_q, + a_sycl_queue, n, a_h.get_array(), ipiv_h.get_array(), @@ -295,7 +290,7 @@ def _lu_factor(a, res_type): ht_copy_out_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray( src=a_h.get_array(), dst=a_h_f.get_array(), - sycl_queue=a.sycl_queue, + sycl_queue=a_sycl_queue, depends=[lapack_ev], ) ht_copy_out_ev.wait() From 59642f6f5b12262282f1f5bb979f218328d6bf2c Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 23 Nov 2023 13:14:26 +0100 Subject: [PATCH 10/55] Remove w/a for dpnp.count_nonzero in slogdet --- dpnp/linalg/dpnp_iface_linalg.py | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py index 18d8f359ae42..25dcb3b03551 100644 --- a/dpnp/linalg/dpnp_iface_linalg.py +++ b/dpnp/linalg/dpnp_iface_linalg.py @@ -671,21 +671,10 @@ def slogdet(a): logdet = dpnp.log(dpnp.abs(diag)).sum(axis=-1) - # Transposing 'ipiv' and reshaping 'arange_values' for element-wise comparison - # as 'dpnp.count_nonzero' does not support the 'axis' parameter. - ipiv_transposed = ipiv.transpose(-1, *range(ipiv.ndim - 1)) - arange_shape = [1] * a.ndim - arange_shape[-2] = n - arange_values = dpnp.arange(1, n + 1, sycl_queue=exec_q).reshape( - arange_shape - ) - # ipiv is 1-origin - # TODO: Replace with 'dpnp.count_nonzero(ipiv != dpnp.arange(1, n + 1), axis=-1)' - # when supported. - non_zero = dpnp.count_nonzero(ipiv_transposed != arange_values) + non_zero = dpnp.count_nonzero(ipiv != dpnp.arange(1, n + 1), axis=-1) if res_type.kind == "f": - non_zero += dpnp.count_nonzero(diag < 0) + non_zero += dpnp.count_nonzero(diag < 0, axis=-1) sign = -(1 ** (non_zero % 2)) if res_type.kind == "c": From db45555a5a6c6cafefc04777f9b77a8ed034601f Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 23 Nov 2023 15:36:45 +0100 Subject: [PATCH 11/55] getrf returns pair of events and uses dpctl.utils.keep_args_alive --- dpnp/backend/extensions/lapack/getrf.cpp | 18 +++++++++++------- dpnp/backend/extensions/lapack/getrf.hpp | 13 +++++++------ dpnp/linalg/dpnp_utils_linalg.py | 6 +++--- 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp index d48aee69693d..b7d830f63c24 100644 --- a/dpnp/backend/extensions/lapack/getrf.cpp +++ b/dpnp/backend/extensions/lapack/getrf.cpp @@ -130,12 +130,13 @@ static sycl::event getrf_impl(sycl::queue exec_q, return getrf_event; } -sycl::event getrf(sycl::queue q, - const std::int64_t n, - dpctl::tensor::usm_ndarray a_array, - dpctl::tensor::usm_ndarray ipiv_array, - dpctl::tensor::usm_ndarray dev_info_array, - const std::vector &depends = {}) +std::pair + getrf(sycl::queue q, + const std::int64_t n, + dpctl::tensor::usm_ndarray a_array, + dpctl::tensor::usm_ndarray ipiv_array, + dpctl::tensor::usm_ndarray dev_info_array, + const std::vector &depends) { auto array_types = dpctl_td_ns::usm_ndarray_types(); @@ -165,7 +166,10 @@ sycl::event getrf(sycl::queue q, sycl::event getrf_ev = getrf_fn(q, n, a_array_data, lda, d_ipiv, d_dev_info, host_task_events, depends); - return getrf_ev; + sycl::event args_ev = dpctl::utils::keep_args_alive( + q, {a_array, ipiv_array, dev_info_array}, host_task_events); + + return std::make_pair(args_ev, getrf_ev); } template diff --git a/dpnp/backend/extensions/lapack/getrf.hpp b/dpnp/backend/extensions/lapack/getrf.hpp index 583f4bf2d1e8..2b5d7c18ce47 100644 --- a/dpnp/backend/extensions/lapack/getrf.hpp +++ b/dpnp/backend/extensions/lapack/getrf.hpp @@ -38,12 +38,13 @@ namespace ext { namespace lapack { -sycl::event getrf(sycl::queue exec_q, - const std::int64_t n, - dpctl::tensor::usm_ndarray a_array, - dpctl::tensor::usm_ndarray ipiv_array, - dpctl::tensor::usm_ndarray dev_info_array, - const std::vector &depends); +extern std::pair + getrf(sycl::queue exec_q, + const std::int64_t n, + dpctl::tensor::usm_ndarray a_array, + dpctl::tensor::usm_ndarray ipiv_array, + dpctl::tensor::usm_ndarray dev_info_array, + const std::vector &depends = {}); extern void init_getrf_dispatch_vector(void); } // namespace lapack diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 7e74884dda18..435263701086 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -240,7 +240,7 @@ def _lu_factor(a, res_type): sycl_queue=a_sycl_queue, ) - ht_lapack_ev[i] = li._getrf( + ht_lapack_ev[i], _ = li._getrf( a_sycl_queue, n, a_vecs[i].get_array(), @@ -274,7 +274,7 @@ def _lu_factor(a, res_type): src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a_sycl_queue ) - lapack_ev = li._getrf( + ht_lapack_ev, lapack_ev = li._getrf( a_sycl_queue, n, a_h.get_array(), @@ -298,7 +298,7 @@ def _lu_factor(a, res_type): else: out_v = (a_h, ipiv_h, dev_info_h) - lapack_ev.wait() + ht_lapack_ev.wait() a_ht_copy_ev.wait() return out_v From 0350d86e4bbc28a779e2f98146c9cde2e918001f Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Mon, 27 Nov 2023 12:58:23 +0100 Subject: [PATCH 12/55] Update dpnp.linalg.det using slogdet --- dpnp/linalg/dpnp_iface_linalg.py | 57 ++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py index 25dcb3b03551..27bd09878926 100644 --- a/dpnp/linalg/dpnp_iface_linalg.py +++ b/dpnp/linalg/dpnp_iface_linalg.py @@ -143,34 +143,57 @@ def cond(input, p=None): return call_origin(numpy.linalg.cond, input, p) -def det(input): +def det(a): """ Compute the determinant of an array. + For full documentation refer to :obj:`numpy.linalg.det`. + Parameters ---------- - input : (..., M, M) array_like + a : (..., M, M) dpnp.ndarray Input array to compute determinants for. Returns ------- - det : (...) array_like - Determinant of `input`. - """ + det : (...) dpnp.ndarray + Determinant of `a`. + + Limitations + ----------- + Parameter `a` is supported as :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`. + Input array data types are limited by supported DPNP :ref:`Data types`. + + See Also + -------- + :obj:`dpnp.linalg.slogdet` : Returns sign and logarithm of the determinant of an array. - # x1_desc = dpnp.get_dpnp_descriptor(input, copy_when_nondefault_queue=False) - # if x1_desc: - # if x1_desc.ndim < 2: - # pass - # elif x1_desc.shape[-1] == x1_desc.shape[-2]: - # result_obj = dpnp_det(x1_desc).get_pyobj() - # result = dpnp.convert_single_elem_array_to_scalar(result_obj) + Examples + -------- + The determinant of a 2-D array [[a, b], [c, d]] is ad - bc: + + >>> import dpnp as dp + >>> a = dp.array([[1, 2], [3, 4]]) + >>> dp.linalg.det(a) + array(-2.) + + Computing determinants for a stack of matrices: - # return result + >>> a = dp.array([ [[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]] ]) + >>> a.shape + (3, 2, 2) + >>> dp.linalg.det(a) + array([-2., -3., -8.]) - # return call_origin(numpy.linalg.det, input) + """ + + if not dpnp.is_supported_array_type(a): + raise TypeError( + "An array must be any of supported type, but got {}".format(type(a)) + ) - return _lu_factor(input) + sign, logdet = slogdet(a) + return sign * dpnp.exp(logdet) def eig(x1): @@ -652,8 +675,8 @@ def slogdet(a): if a.size == 0: # empty batch (result is empty, too) or empty matrices det([[]]) == 1 - sign = dpnp.ones(shape, res_type) - logdet = dpnp.zeros(shape, logdet_dtype) + sign = dpnp.ones(shape, dtype=res_type) + logdet = dpnp.zeros(shape, dtype=logdet_dtype) return sign, logdet lu, ipiv, dev_info = _lu_factor(a, res_type) From 6c20debd83f12e469461887b170e635162652da9 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Mon, 27 Nov 2023 13:00:27 +0100 Subject: [PATCH 13/55] Add new cupy tests for dpnp.linalg.det --- tests/test_linalg.py | 16 ++-- .../cupy/linalg_tests/test_norms.py | 76 +++++++++++++++++++ 2 files changed, 84 insertions(+), 8 deletions(-) diff --git a/tests/test_linalg.py b/tests/test_linalg.py index 89361795a6e2..2a78c3147f17 100644 --- a/tests/test_linalg.py +++ b/tests/test_linalg.py @@ -106,18 +106,18 @@ def test_cond(arr, p): [[0, 0], [0, 0]], [[1, 2], [1, 2]], [[1, 2], [3, 4]], - # [[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]], - # [ - # [[[1, 2], [3, 4]], [[1, 2], [2, 1]]], - # [[[1, 3], [3, 1]], [[0, 1], [1, 3]]], - # ], + [[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]], + [ + [[[1, 2], [3, 4]], [[1, 2], [2, 1]]], + [[[1, 3], [3, 1]], [[0, 1], [1, 3]]], + ], ], ids=[ "[[0, 0], [0, 0]]", "[[1, 2], [1, 2]]", "[[1, 2], [3, 4]]", - # "[[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]]", - # "[[[[1, 2], [3, 4]], [[1, 2], [2, 1]]], [[[1, 3], [3, 1]], [[0, 1], [1, 3]]]]", + "[[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]]", + "[[[[1, 2], [3, 4]], [[1, 2], [2, 1]]], [[[1, 3], [3, 1]], [[0, 1], [1, 3]]]]", ], ) def test_det(array): @@ -128,7 +128,7 @@ def test_det(array): assert_allclose(expected, result) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") +# @pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_det_empty(): a = numpy.empty((0, 0, 2, 2), dtype=numpy.float32) ia = inp.array(a) diff --git a/tests/third_party/cupy/linalg_tests/test_norms.py b/tests/third_party/cupy/linalg_tests/test_norms.py index 71c8922a23ea..e23409d877f0 100644 --- a/tests/third_party/cupy/linalg_tests/test_norms.py +++ b/tests/third_party/cupy/linalg_tests/test_norms.py @@ -7,6 +7,82 @@ from tests.third_party.cupy import testing +class TestDet(unittest.TestCase): + @testing.for_float_dtypes(no_float16=True) + @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) + def test_det(self, xp, dtype): + a = testing.shaped_arange((2, 2), xp, dtype) + 1 + return xp.linalg.det(a) + + @testing.for_float_dtypes(no_float16=True) + @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) + def test_det_3(self, xp, dtype): + a = testing.shaped_arange((2, 2, 2), xp, dtype) + 1 + return xp.linalg.det(a) + + @testing.for_float_dtypes(no_float16=True) + @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) + def test_det_4(self, xp, dtype): + a = testing.shaped_arange((2, 2, 2, 2), xp, dtype) + 1 + return xp.linalg.det(a) + + @testing.for_float_dtypes(no_float16=True) + @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) + def test_det_empty_batch(self, xp, dtype): + a = xp.empty((2, 0, 3, 3), dtype) + return xp.linalg.det(a) + + @testing.for_float_dtypes(no_float16=True) + @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) + def test_det_empty_matrix(self, xp, dtype): + a = xp.empty((0, 0), dtype) + return xp.linalg.det(a) + + @testing.for_float_dtypes(no_float16=True) + @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) + def test_det_empty_matrices(self, xp, dtype): + a = xp.empty((2, 3, 0, 0), dtype) + return xp.linalg.det(a) + + @testing.for_float_dtypes(no_float16=True) + def test_det_different_last_two_dims(self, dtype): + for xp in (numpy, cupy): + a = testing.shaped_arange((2, 3, 2), xp, dtype) + # TODO: replace ValueError with dpnp.linalg.LinAlgError + with pytest.raises((numpy.linalg.LinAlgError, ValueError)): + xp.linalg.det(a) + + @testing.for_float_dtypes(no_float16=True) + def test_det_different_last_two_dims_empty_batch(self, dtype): + for xp in (numpy, cupy): + a = xp.empty((0, 3, 2), dtype=dtype) + # TODO: replace ValueError with dpnp.linalg.LinAlgError + with pytest.raises((numpy.linalg.LinAlgError, ValueError)): + xp.linalg.det(a) + + @testing.for_float_dtypes(no_float16=True) + def test_det_one_dim(self, dtype): + for xp in (numpy, cupy): + a = testing.shaped_arange((2,), xp, dtype) + # TODO: replace ValueError with dpnp.linalg.LinAlgError + with pytest.raises((numpy.linalg.LinAlgError, ValueError)): + xp.linalg.det(a) + + @testing.for_float_dtypes(no_float16=True) + def test_det_zero_dim(self, dtype): + for xp in (numpy, cupy): + a = testing.shaped_arange((), xp, dtype) + # TODO: replace ValueError with dpnp.linalg.LinAlgError + with pytest.raises((numpy.linalg.LinAlgError, ValueError)): + xp.linalg.det(a) + + @testing.for_float_dtypes(no_float16=True) + @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) + def test_det_singular(self, xp, dtype): + a = xp.zeros((2, 3, 3), dtype=dtype) + return xp.linalg.det(a) + + class TestSlogdet(unittest.TestCase): @testing.for_dtypes("fd") @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) From 3359c05cb413af7ed039f9456a7394bfdc596b0e Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Mon, 27 Nov 2023 16:13:30 +0100 Subject: [PATCH 14/55] Add ipiv_vecs and dev_info_vecs in _lu_factor for the batch case --- dpnp/linalg/dpnp_iface_linalg.py | 8 ++++- dpnp/linalg/dpnp_utils_linalg.py | 35 ++++++++++--------- .../cupy/linalg_tests/test_norms.py | 5 +++ 3 files changed, 31 insertions(+), 17 deletions(-) diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py index 27bd09878926..e764a870b993 100644 --- a/dpnp/linalg/dpnp_iface_linalg.py +++ b/dpnp/linalg/dpnp_iface_linalg.py @@ -695,7 +695,13 @@ def slogdet(a): logdet = dpnp.log(dpnp.abs(diag)).sum(axis=-1) # ipiv is 1-origin - non_zero = dpnp.count_nonzero(ipiv != dpnp.arange(1, n + 1), axis=-1) + non_zero = dpnp.count_nonzero( + ipiv + != dpnp.arange( + 1, n + 1, usm_type=ipiv.usm_type, sycl_queue=ipiv.sycl_queue + ), + axis=-1, + ) if res_type.kind == "f": non_zero += dpnp.count_nonzero(diag < 0, axis=-1) diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 435263701086..1e2081802928 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -217,18 +217,9 @@ def _lu_factor(a, res_type): a_usm_arr = dpnp.get_usm_ndarray(a) a_vecs = [None] * batch_size - ipiv_h = dpnp.empty( - (batch_size, n), - dtype=dpnp.int64, - usm_type=a_usm_type, - sycl_queue=a_sycl_queue, - ) - dev_info_h = dpnp.empty( - (batch_size,), - dtype=dpnp.int64, - usm_type=a_usm_type, - sycl_queue=a_sycl_queue, - ) + ipiv_vecs = [None] * batch_size + dev_info_vecs = [None] * batch_size + a_ht_copy_ev = [None] * batch_size ht_lapack_ev = [None] * batch_size @@ -239,13 +230,25 @@ def _lu_factor(a, res_type): dst=a_vecs[i].get_array(), sycl_queue=a_sycl_queue, ) + ipiv_vecs[i] = dpnp.empty( + (n,), + dtype=dpnp.int64, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + dev_info_vecs[i] = dpnp.empty( + (1,), + dtype=dpnp.int64, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) ht_lapack_ev[i], _ = li._getrf( a_sycl_queue, n, a_vecs[i].get_array(), - ipiv_h[i].get_array(), - dev_info_h[i].get_array(), + ipiv_vecs[i].get_array(), + dev_info_vecs[i].get_array(), [a_copy_ev], ) @@ -254,8 +257,8 @@ def _lu_factor(a, res_type): a_ht_copy_ev[i].wait() out_v = dpnp.array(a_vecs, order=a_order).reshape(orig_shape) - out_ipiv = ipiv_h.reshape(orig_shape[:-1]) - out_dev_info = dev_info_h.reshape(orig_shape[:-2]) + out_ipiv = dpnp.array(ipiv_vecs).reshape(orig_shape[:-1]) + out_dev_info = dpnp.array(dev_info_vecs).reshape(orig_shape[:-2]) return (out_v, out_ipiv, out_dev_info) diff --git a/tests/third_party/cupy/linalg_tests/test_norms.py b/tests/third_party/cupy/linalg_tests/test_norms.py index e23409d877f0..4c0449019a14 100644 --- a/tests/third_party/cupy/linalg_tests/test_norms.py +++ b/tests/third_party/cupy/linalg_tests/test_norms.py @@ -4,6 +4,7 @@ import pytest import dpnp as cupy +from tests.helper import is_cpu_device from tests.third_party.cupy import testing @@ -105,6 +106,10 @@ def test_slogdet_4(self, xp, dtype): sign, logdet = xp.linalg.slogdet(a) return sign, logdet + # TODO: remove it + @pytest.mark.skipif( + is_cpu_device(), reason="Need to add a logic for this case" + ) @testing.for_dtypes("fd") @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) def test_slogdet_singular(self, xp, dtype): From 1a913856a3f7a7e21174b183a7719d2459d1c1e9 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Mon, 27 Nov 2023 16:25:02 +0100 Subject: [PATCH 15/55] Skip test_det on CPU due to bug in MKL --- tests/test_linalg.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_linalg.py b/tests/test_linalg.py index 2a78c3147f17..1631f22e1364 100644 --- a/tests/test_linalg.py +++ b/tests/test_linalg.py @@ -5,7 +5,7 @@ import dpnp as inp -from .helper import get_all_dtypes, get_complex_dtypes, has_support_aspect64 +from .helper import get_all_dtypes, has_support_aspect64, is_cpu_device def vvsort(val, vec, size, xp): @@ -100,6 +100,7 @@ def test_cond(arr, p): assert_array_equal(expected, result) +@pytest.mark.skipif(is_cpu_device(), reason="MKL bug MKLD-16626") @pytest.mark.parametrize( "array", [ @@ -128,7 +129,6 @@ def test_det(array): assert_allclose(expected, result) -# @pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_det_empty(): a = numpy.empty((0, 0, 2, 2), dtype=numpy.float32) ia = inp.array(a) From b8607909044529f494c92cd655dc897ee4dd608f Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 28 Nov 2023 12:25:16 +0100 Subject: [PATCH 16/55] Small update of cupy tests in test_norms.py --- tests/third_party/cupy/linalg_tests/test_norms.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/third_party/cupy/linalg_tests/test_norms.py b/tests/third_party/cupy/linalg_tests/test_norms.py index 4c0449019a14..ea6f3ec3941e 100644 --- a/tests/third_party/cupy/linalg_tests/test_norms.py +++ b/tests/third_party/cupy/linalg_tests/test_norms.py @@ -30,19 +30,19 @@ def test_det_4(self, xp, dtype): @testing.for_float_dtypes(no_float16=True) @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) def test_det_empty_batch(self, xp, dtype): - a = xp.empty((2, 0, 3, 3), dtype) + a = xp.empty((2, 0, 3, 3), dtype=dtype) return xp.linalg.det(a) @testing.for_float_dtypes(no_float16=True) @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) def test_det_empty_matrix(self, xp, dtype): - a = xp.empty((0, 0), dtype) + a = xp.empty((0, 0), dtype=dtype) return xp.linalg.det(a) @testing.for_float_dtypes(no_float16=True) @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) def test_det_empty_matrices(self, xp, dtype): - a = xp.empty((2, 3, 0, 0), dtype) + a = xp.empty((2, 3, 0, 0), dtype=dtype) return xp.linalg.det(a) @testing.for_float_dtypes(no_float16=True) @@ -77,6 +77,8 @@ def test_det_zero_dim(self, dtype): with pytest.raises((numpy.linalg.LinAlgError, ValueError)): xp.linalg.det(a) + # TODO: remove skipif when MKLD-16626 is resolved + @pytest.mark.skipif(is_cpu_device(), reason="MKL bug MKLD-16626") @testing.for_float_dtypes(no_float16=True) @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) def test_det_singular(self, xp, dtype): @@ -106,10 +108,8 @@ def test_slogdet_4(self, xp, dtype): sign, logdet = xp.linalg.slogdet(a) return sign, logdet - # TODO: remove it - @pytest.mark.skipif( - is_cpu_device(), reason="Need to add a logic for this case" - ) + # TODO: remove skipif when MKLD-16626 is resolved + @pytest.mark.skipif(is_cpu_device(), reason="MKL bug MKLD-16626") @testing.for_dtypes("fd") @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) def test_slogdet_singular(self, xp, dtype): From 37d476bcb654ccc5c772ccd4a9434687b75e41a5 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 29 Nov 2023 15:29:22 +0100 Subject: [PATCH 17/55] Add support of complex dtype for dpnp.diagonal and update test_diagonal --- dpnp/backend/kernels/dpnp_krnl_indexing.cpp | 4 ++++ tests/test_indexing.py | 5 +++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp index 2cca84f9e61f..eceadd7d9650 100644 --- a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp @@ -950,6 +950,10 @@ void func_map_init_indexing_func(func_map_t &fmap) eft_FLT, (void *)dpnp_diagonal_ext_c}; fmap[DPNPFuncName::DPNP_FN_DIAGONAL_EXT][eft_DBL][eft_DBL] = { eft_DBL, (void *)dpnp_diagonal_ext_c}; + fmap[DPNPFuncName::DPNP_FN_DIAGONAL_EXT][eft_C64][eft_C64] = { + eft_C64, (void *)dpnp_diagonal_ext_c>}; + fmap[DPNPFuncName::DPNP_FN_DIAGONAL_EXT][eft_C128][eft_C128] = { + eft_C128, (void *)dpnp_diagonal_ext_c>}; fmap[DPNPFuncName::DPNP_FN_FILL_DIAGONAL][eft_INT][eft_INT] = { eft_INT, (void *)dpnp_fill_diagonal_default_c}; diff --git a/tests/test_indexing.py b/tests/test_indexing.py index 03541dc2d55d..33d3746f19b1 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -77,6 +77,7 @@ def test_choose(): assert_array_equal(expected, result) +@pytest.mark.parametrize("arr_dtype", get_all_dtypes(no_bool=True)) @pytest.mark.parametrize("offset", [0, 1], ids=["0", "1"]) @pytest.mark.parametrize( "array", @@ -112,8 +113,8 @@ def test_choose(): "[[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], [[[13, 14, 15], [16, 17, 18]], [[19, 20, 21], [22, 23, 24]]]]", ], ) -def test_diagonal(array, offset): - a = numpy.array(array) +def test_diagonal(array, arr_dtype, offset): + a = numpy.array(array, dtype=arr_dtype) ia = dpnp.array(a) expected = numpy.diagonal(a, offset) result = dpnp.diagonal(ia, offset) From 2e8b4fe35a6a70a2a5a83436a2e76b5c80a0fc37 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 29 Nov 2023 16:05:34 +0100 Subject: [PATCH 18/55] lu_factor func returns the result of LU decomposition as c-contiguous and add explanatory comments --- dpnp/backend/extensions/lapack/getrf.cpp | 12 +++---- dpnp/linalg/dpnp_utils_linalg.py | 42 +++++++++++------------- 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp index b7d830f63c24..a83d94ac5069 100644 --- a/dpnp/backend/extensions/lapack/getrf.cpp +++ b/dpnp/backend/extensions/lapack/getrf.cpp @@ -84,13 +84,13 @@ static sycl::event getrf_impl(sycl::queue exec_q, getrf_event = oneapi::mkl::lapack::getrf( exec_q, - n, // The order of the matrix A; (0 ≤ n). - n, // The order of the matrix A; (0 ≤ n). - a, // Pointer to the n-by-n coefficient matrix A. - lda, // The leading dimension of a. - ipiv, + n, // Order of the square matrix; (0 ≤ n). + n, // Order of the square matrix; (0 ≤ n). + a, // Pointer to the n-by-n matrix. + lda, // The leading dimension of `a`. + ipiv, // Pointer to the array of pivot indices. scratchpad, // Pointer to scratchpad memory to be used by MKL - // routine for storing intermediate results + // routine for storing intermediate results. scratchpad_size, depends); } catch (mkl_lapack::exception const &e) { error_msg diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 1e2081802928..1139ea5ab121 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -204,7 +204,6 @@ def _lu_factor(a, res_type): if m != n: raise ValueError("Last 2 dimensions of the input array must be square") - a_order = "C" if a.flags.c_contiguous else "F" a_sycl_queue = a.sycl_queue a_usm_type = a.usm_type @@ -212,19 +211,22 @@ def _lu_factor(a, res_type): if a.ndim > 2: orig_shape = a.shape + # get 3d input arrays by reshape a = a.reshape(-1, n, n) batch_size = a.shape[0] a_usm_arr = dpnp.get_usm_ndarray(a) + # Initialize lists for storing arrays and events for each batch a_vecs = [None] * batch_size ipiv_vecs = [None] * batch_size dev_info_vecs = [None] * batch_size - a_ht_copy_ev = [None] * batch_size ht_lapack_ev = [None] * batch_size + # Process each batch for i in range(batch_size): - a_vecs[i] = dpnp.empty_like(a[i], order="F", dtype=res_type) + # Copy each 2D slice to a new array as getrf destroys the input matrix + a_vecs[i] = dpnp.empty_like(a[i], order="C", dtype=res_type) a_ht_copy_ev[i], a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( src=a_usm_arr[i], dst=a_vecs[i].get_array(), @@ -243,6 +245,8 @@ def _lu_factor(a, res_type): sycl_queue=a_sycl_queue, ) + # Call the LAPACK extension function _getrf + # to perform LU decomposition on each batch in 'a_vecs[i]' ht_lapack_ev[i], _ = li._getrf( a_sycl_queue, n, @@ -256,7 +260,8 @@ def _lu_factor(a, res_type): ht_lapack_ev[i].wait() a_ht_copy_ev[i].wait() - out_v = dpnp.array(a_vecs, order=a_order).reshape(orig_shape) + # Reshape the results back to their original shape + out_v = dpnp.array(a_vecs, order="C").reshape(orig_shape) out_ipiv = dpnp.array(ipiv_vecs).reshape(orig_shape[:-1]) out_dev_info = dpnp.array(dev_info_vecs).reshape(orig_shape[:-2]) @@ -265,7 +270,8 @@ def _lu_factor(a, res_type): else: a_usm_arr = dpnp.get_usm_ndarray(a) - a_h = dpnp.empty_like(a, order="F", dtype=res_type) + # `a`` must be copied because getrf destroys the input matrix + a_h = dpnp.empty_like(a, order="C", dtype=res_type) ipiv_h = dpnp.empty( n, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue ) @@ -273,11 +279,15 @@ def _lu_factor(a, res_type): 1, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue ) + # use DPCTL tensor function to fill the сopy of the input array + # from the input array a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a_sycl_queue ) - ht_lapack_ev, lapack_ev = li._getrf( + # Call the LAPACK extension function _getrf + # to perform LU decomposition on the input matrix + ht_lapack_ev, _ = li._getrf( a_sycl_queue, n, a_h.get_array(), @@ -286,22 +296,10 @@ def _lu_factor(a, res_type): [a_copy_ev], ) - if a_order != "F": - # need to align order of the result of solutions with the - # input array of multiple dependent variables - a_h_f = dpnp.empty_like(a_h, order=a_order) - ht_copy_out_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray( - src=a_h.get_array(), - dst=a_h_f.get_array(), - sycl_queue=a_sycl_queue, - depends=[lapack_ev], - ) - ht_copy_out_ev.wait() - out_v = (a_h_f, ipiv_h, dev_info_h) - else: - out_v = (a_h, ipiv_h, dev_info_h) - ht_lapack_ev.wait() a_ht_copy_ev.wait() - return out_v + # Return a tuple containing the factorized matrix 'a_h', + # pivot indices 'ipiv_h' + # and the status 'dev_info_h' from the LAPACK getrf call + return (a_h, ipiv_h, dev_info_h) From 3849d92189b3c5587a2e204a0a6c7aa52e0529af Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 29 Nov 2023 18:40:19 +0100 Subject: [PATCH 19/55] Add getrf_batch MKL extension --- dpnp/backend/extensions/lapack/CMakeLists.txt | 1 + dpnp/backend/extensions/lapack/getrf.hpp | 12 + .../backend/extensions/lapack/getrf_batch.cpp | 220 ++++++++++++++++++ dpnp/backend/extensions/lapack/lapack_py.cpp | 11 +- .../extensions/lapack/types_matrix.hpp | 28 ++- dpnp/linalg/dpnp_utils_linalg.py | 119 +++++++--- 6 files changed, 353 insertions(+), 38 deletions(-) create mode 100644 dpnp/backend/extensions/lapack/getrf_batch.cpp diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt index a4ff161d0edc..fa6085895088 100644 --- a/dpnp/backend/extensions/lapack/CMakeLists.txt +++ b/dpnp/backend/extensions/lapack/CMakeLists.txt @@ -28,6 +28,7 @@ set(python_module_name _lapack_impl) set(_module_src ${CMAKE_CURRENT_SOURCE_DIR}/lapack_py.cpp ${CMAKE_CURRENT_SOURCE_DIR}/getrf.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/getrf_batch.cpp ${CMAKE_CURRENT_SOURCE_DIR}/heevd.cpp ${CMAKE_CURRENT_SOURCE_DIR}/syevd.cpp ) diff --git a/dpnp/backend/extensions/lapack/getrf.hpp b/dpnp/backend/extensions/lapack/getrf.hpp index 2b5d7c18ce47..e66edefc52cd 100644 --- a/dpnp/backend/extensions/lapack/getrf.hpp +++ b/dpnp/backend/extensions/lapack/getrf.hpp @@ -46,7 +46,19 @@ extern std::pair dpctl::tensor::usm_ndarray dev_info_array, const std::vector &depends = {}); +extern std::pair + getrf_batch(sycl::queue exec_q, + dpctl::tensor::usm_ndarray a_array, + dpctl::tensor::usm_ndarray ipiv_array, + dpctl::tensor::usm_ndarray dev_info_array, + std::int64_t n, + std::int64_t stride_a, + std::int64_t stride_ipiv, + std::int64_t batch_size, + const std::vector &depends = {}); + extern void init_getrf_dispatch_vector(void); +extern void init_getrf_batch_dispatch_vector(void); } // namespace lapack } // namespace ext } // namespace backend diff --git a/dpnp/backend/extensions/lapack/getrf_batch.cpp b/dpnp/backend/extensions/lapack/getrf_batch.cpp new file mode 100644 index 000000000000..b552358965d1 --- /dev/null +++ b/dpnp/backend/extensions/lapack/getrf_batch.cpp @@ -0,0 +1,220 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include + +// dpctl tensor headers +#include "utils/memory_overlap.hpp" +#include "utils/type_utils.hpp" + +#include "getrf.hpp" +#include "types_matrix.hpp" + +#include "dpnp_utils.hpp" + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace lapack +{ +namespace mkl_lapack = oneapi::mkl::lapack; +namespace py = pybind11; +namespace type_utils = dpctl::tensor::type_utils; + +typedef sycl::event (*getrf_batch_impl_fn_ptr_t)( + sycl::queue, + std::int64_t, + char *, + std::int64_t, + std::int64_t, + std::int64_t *, + std::int64_t, + std::int64_t, + std::int64_t *, + std::vector &, + const std::vector &); + +static getrf_batch_impl_fn_ptr_t + getrf_batch_dispatch_vector[dpctl_td_ns::num_types]; + +template +static sycl::event getrf_batch_impl(sycl::queue exec_q, + std::int64_t n, + char *in_a, + std::int64_t lda, + std::int64_t stride_a, + std::int64_t *ipiv, + std::int64_t stride_ipiv, + std::int64_t batch_size, + std::int64_t *dev_info, + std::vector &host_task_events, + const std::vector &depends) +{ + type_utils::validate_type_for_device(exec_q); + + T *a = reinterpret_cast(in_a); + + const std::int64_t scratchpad_size = + oneapi::mkl::lapack::getrf_batch_scratchpad_size( + exec_q, n, n, lda, stride_a, stride_ipiv, batch_size); + T *scratchpad = nullptr; + + std::stringstream error_msg; + std::int64_t info = 0; + + sycl::event getrf_batch_event; + try { + scratchpad = sycl::malloc_device(scratchpad_size, exec_q); + + getrf_batch_event = oneapi::mkl::lapack::getrf_batch( + exec_q, + n, // Order of each square matrix in the batch; (0 ≤ n). + n, // Order of each square matrix in the batch; (0 ≤ n). + a, // Pointer to the batch of matrices. + lda, // The leading dimension of `a`. + stride_a, // Stride between matrices: Element spacing between + // matrices in `a`. + ipiv, // Pivot indices: Pointer to the pivot indices for each + // matrix. + stride_ipiv, // Stride between pivot indices: Spacing between pivot + // arrays in 'ipiv'. + batch_size, // Total number of matrices in the batch. + scratchpad, // Pointer to scratchpad memory to be used by MKL + // routine for storing intermediate results. + scratchpad_size, depends); + } catch (mkl_lapack::exception const &e) { + error_msg + << "Unexpected MKL exception caught during getrf() call:\nreason: " + << e.what() << "\ninfo: " << e.info(); + info = e.info(); + } catch (sycl::exception const &e) { + error_msg << "Unexpected SYCL exception caught during getrf() call:\n" + << e.what(); + info = -1; + } + + if (info != 0) // an unexpected error occurs + { + if (scratchpad != nullptr) { + sycl::free(scratchpad, exec_q); + } + + if (info < 0) { + throw std::runtime_error(error_msg.str()); + } + } + + sycl::event write_info_event = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(getrf_batch_event); + cgh.single_task([=]() { dev_info[0] = info; }); + }); + + host_task_events.push_back(write_info_event); + + sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(getrf_batch_event); + auto ctx = exec_q.get_context(); + cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); }); + }); + host_task_events.push_back(clean_up_event); + return getrf_batch_event; +} + +std::pair + getrf_batch(sycl::queue q, + dpctl::tensor::usm_ndarray a_array, + dpctl::tensor::usm_ndarray ipiv_array, + dpctl::tensor::usm_ndarray dev_info_array, + std::int64_t n, + std::int64_t stride_a, + std::int64_t stride_ipiv, + std::int64_t batch_size, + const std::vector &depends) +{ + + auto array_types = dpctl_td_ns::usm_ndarray_types(); + int a_array_type_id = + array_types.typenum_to_lookup_id(a_array.get_typenum()); + + getrf_batch_impl_fn_ptr_t getrf_batch_fn = + getrf_batch_dispatch_vector[a_array_type_id]; + if (getrf_batch_fn == nullptr) { + throw py::value_error( + "No getrf_batch implementation defined for the provided type " + "of the input matrix."); + } + + char *a_array_data = a_array.get_data(); + const std::int64_t lda = std::max(1UL, n); + + // check valid ipiv + + char *ipiv_array_data = ipiv_array.get_data(); + std::int64_t *d_ipiv = reinterpret_cast(ipiv_array_data); + + char *dev_info_array_data = dev_info_array.get_data(); + std::int64_t *d_dev_info = + reinterpret_cast(dev_info_array_data); + + std::vector host_task_events; + sycl::event getrf_batch_ev = + getrf_batch_fn(q, n, a_array_data, lda, stride_a, d_ipiv, stride_ipiv, + batch_size, d_dev_info, host_task_events, depends); + + sycl::event args_ev = dpctl::utils::keep_args_alive( + q, {a_array, ipiv_array, dev_info_array}, host_task_events); + + return std::make_pair(args_ev, getrf_batch_ev); +} + +template +struct GetrfBatchContigFactory +{ + fnT get() + { + if constexpr (types::GetrfBatchTypePairSupportFactory::is_defined) { + return getrf_batch_impl; + } + else { + return nullptr; + } + } +}; + +void init_getrf_batch_dispatch_vector(void) +{ + dpctl_td_ns::DispatchVectorBuilder + contig; + contig.populate_dispatch_vector(getrf_batch_dispatch_vector); +} +} // namespace lapack +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/extensions/lapack/lapack_py.cpp b/dpnp/backend/extensions/lapack/lapack_py.cpp index 38320d205aa3..c504f9dbd9f8 100644 --- a/dpnp/backend/extensions/lapack/lapack_py.cpp +++ b/dpnp/backend/extensions/lapack/lapack_py.cpp @@ -40,8 +40,9 @@ namespace py = pybind11; // populate dispatch vectors void init_dispatch_vectors(void) { - lapack_ext::init_syevd_dispatch_vector(); + lapack_ext::init_getrf_batch_dispatch_vector(); lapack_ext::init_getrf_dispatch_vector(); + lapack_ext::init_syevd_dispatch_vector(); } // populate dispatch tables @@ -75,4 +76,12 @@ PYBIND11_MODULE(_lapack_impl, m) py::arg("sycl_queue"), py::arg("n"), py::arg("a_array"), py::arg("ipiv_array"), py::arg("dev_info_array"), py::arg("depends") = py::list()); + + m.def("_getrf_batch", &lapack_ext::getrf_batch, + "Call `getrf_batch` from OneMKL LAPACK library to return " + "the LU factorization of a batch of general n x n matrices", + py::arg("sycl_queue"), py::arg("a_array"), py::arg("ipiv_array"), + py::arg("dev_info_array"), py::arg("n"), py::arg("stride_a"), + py::arg("stride_ipiv"), py::arg("batch_size"), + py::arg("depends") = py::list()); } diff --git a/dpnp/backend/extensions/lapack/types_matrix.hpp b/dpnp/backend/extensions/lapack/types_matrix.hpp index b5276e5f1318..8216a61f842d 100644 --- a/dpnp/backend/extensions/lapack/types_matrix.hpp +++ b/dpnp/backend/extensions/lapack/types_matrix.hpp @@ -86,7 +86,7 @@ struct SyevdTypePairSupportFactory * MKL LAPACK library provides support in oneapi::mkl::lapack::getrf * function. * - * @tparam T Type of array containing input matrix A, + * @tparam T Type of array containing input matrix, * as well as the output array for storing the LU factorization. */ template @@ -106,6 +106,32 @@ struct GetrfTypePairSupportFactory // fall-through dpctl_td_ns::NotDefinedEntry>::is_defined; }; + +/** + * @brief A factory to define pairs of supported types for which + * MKL LAPACK library provides support in oneapi::mkl::lapack::getrf_batch + * function. + * + * @tparam T Type of array containing input matrix, + * as well as the output array for storing the LU factorization. + */ +template +struct GetrfBatchTypePairSupportFactory +{ + static constexpr bool is_defined = std::disjunction< + dpctl_td_ns::TypePairDefinedEntry, + dpctl_td_ns::TypePairDefinedEntry, + dpctl_td_ns::TypePairDefinedEntry, + T, + std::complex>, + dpctl_td_ns::TypePairDefinedEntry, + T, + std::complex>, + // fall-through + dpctl_td_ns::NotDefinedEntry>::is_defined; +}; } // namespace types } // namespace lapack } // namespace ext diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 1139ea5ab121..27a00c7de1af 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -207,7 +207,8 @@ def _lu_factor(a, res_type): a_sycl_queue = a.sycl_queue a_usm_type = a.usm_type - # TODO: use getrf_batch + # TODO: Find out at which array sizes the best performance is obtained + use_batch = True if a.ndim > 2: orig_shape = a.shape @@ -216,61 +217,107 @@ def _lu_factor(a, res_type): batch_size = a.shape[0] a_usm_arr = dpnp.get_usm_ndarray(a) - # Initialize lists for storing arrays and events for each batch - a_vecs = [None] * batch_size - ipiv_vecs = [None] * batch_size - dev_info_vecs = [None] * batch_size - a_ht_copy_ev = [None] * batch_size - ht_lapack_ev = [None] * batch_size - - # Process each batch - for i in range(batch_size): - # Copy each 2D slice to a new array as getrf destroys the input matrix - a_vecs[i] = dpnp.empty_like(a[i], order="C", dtype=res_type) - a_ht_copy_ev[i], a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( - src=a_usm_arr[i], - dst=a_vecs[i].get_array(), - sycl_queue=a_sycl_queue, - ) - ipiv_vecs[i] = dpnp.empty( - (n,), + if use_batch: + # `a` must be copied because getrf_batch destroys the input matrix + a_h = dpnp.empty_like(a, order="C", dtype=res_type) + ipiv_h = dpnp.empty( + (batch_size, n), dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue, ) - dev_info_vecs[i] = dpnp.empty( - (1,), + dev_info_h = dpnp.empty( + 1, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue, ) - # Call the LAPACK extension function _getrf - # to perform LU decomposition on each batch in 'a_vecs[i]' - ht_lapack_ev[i], _ = li._getrf( + a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a_sycl_queue + ) + + ipiv_stride = n + a_stride = a_h.strides[0] + + # Call the LAPACK extension function _getrf_batch + # to perform LU decomposition of a batch of general matrices + ht_lapack_ev, _ = li._getrf_batch( a_sycl_queue, + a_h.get_array(), + ipiv_h.get_array(), + dev_info_h.get_array(), n, - a_vecs[i].get_array(), - ipiv_vecs[i].get_array(), - dev_info_vecs[i].get_array(), + a_stride, + ipiv_stride, + batch_size, [a_copy_ev], ) - for i in range(batch_size): - ht_lapack_ev[i].wait() - a_ht_copy_ev[i].wait() + ht_lapack_ev.wait() + a_ht_copy_ev.wait() - # Reshape the results back to their original shape - out_v = dpnp.array(a_vecs, order="C").reshape(orig_shape) - out_ipiv = dpnp.array(ipiv_vecs).reshape(orig_shape[:-1]) - out_dev_info = dpnp.array(dev_info_vecs).reshape(orig_shape[:-2]) + return (a_h, ipiv_h, dev_info_h) - return (out_v, out_ipiv, out_dev_info) + else: + # Initialize lists for storing arrays and events for each batch + a_vecs = [None] * batch_size + ipiv_vecs = [None] * batch_size + dev_info_vecs = [None] * batch_size + a_ht_copy_ev = [None] * batch_size + ht_lapack_ev = [None] * batch_size + + # Process each batch + for i in range(batch_size): + # Copy each 2D slice to a new array as getrf destroys the input matrix + a_vecs[i] = dpnp.empty_like(a[i], order="C", dtype=res_type) + ( + a_ht_copy_ev[i], + a_copy_ev, + ) = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_usm_arr[i], + dst=a_vecs[i].get_array(), + sycl_queue=a_sycl_queue, + ) + ipiv_vecs[i] = dpnp.empty( + (n,), + dtype=dpnp.int64, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + dev_info_vecs[i] = dpnp.empty( + (1,), + dtype=dpnp.int64, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + + # Call the LAPACK extension function _getrf + # to perform LU decomposition on each batch in 'a_vecs[i]' + ht_lapack_ev[i], _ = li._getrf( + a_sycl_queue, + n, + a_vecs[i].get_array(), + ipiv_vecs[i].get_array(), + dev_info_vecs[i].get_array(), + [a_copy_ev], + ) + + for i in range(batch_size): + ht_lapack_ev[i].wait() + a_ht_copy_ev[i].wait() + + # Reshape the results back to their original shape + out_v = dpnp.array(a_vecs, order="C").reshape(orig_shape) + out_ipiv = dpnp.array(ipiv_vecs).reshape(orig_shape[:-1]) + out_dev_info = dpnp.array(dev_info_vecs).reshape(orig_shape[:-2]) + + return (out_v, out_ipiv, out_dev_info) else: a_usm_arr = dpnp.get_usm_ndarray(a) - # `a`` must be copied because getrf destroys the input matrix + # `a` must be copied because getrf destroys the input matrix a_h = dpnp.empty_like(a, order="C", dtype=res_type) ipiv_h = dpnp.empty( n, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue From cd282a38a7ead6478ed54b39291fcc3af7cb7f70 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 30 Nov 2023 10:56:42 +0100 Subject: [PATCH 20/55] Update docstring for slogdet --- dpnp/linalg/dpnp_iface_linalg.py | 83 +++++++++++++++++++++++--------- 1 file changed, 59 insertions(+), 24 deletions(-) diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py index e764a870b993..cbc742d72440 100644 --- a/dpnp/linalg/dpnp_iface_linalg.py +++ b/dpnp/linalg/dpnp_iface_linalg.py @@ -606,38 +606,69 @@ def svd(x1, full_matrices=True, compute_uv=True, hermitian=False): def slogdet(a): - """Returns sign and logarithm of the determinant of an array. + """ + Returns sign and logarithm of the determinant of an array. It calculates the natural logarithm of the determinant of a given value. - Args: + For full documentation refer to :obj:`numpy.linalg.slogdet`. + + Args + ---- a (dpnp.ndarray): The input matrix with dimension ``(..., N, N)``. - Returns: - tuple of :class:`~dpnp.ndarray`: - It returns a tuple ``(sign, logdet)``. ``sign`` represents each - sign of the determinant as a real number ``0``, ``1`` or ``-1``. - 'logdet' represents the natural logarithm of the absolute of the + Returns + ------- + out : tuple of :class:`~dpnp.ndarray`: + It returns a tuple ``(sign, logdet)``. + ``sign`` represents each sign of the determinant as a real number + ``0``, ``1`` or ``-1``. + ``logdet`` represents the natural logarithm of the absolute of the determinant. If the determinant is zero, ``sign`` will be ``0`` and ``logdet`` will be ``-inf``. The shapes of both ``sign`` and ``logdet`` are equal to ``a.shape[:-2]``. - .. warning:: - This function calls one or more cuSOLVER routine(s) which may yield - invalid results if input conditions are not met. - To detect these invalid results, you can set the `linalg` - configuration to a value that is not `ignore` in - :func:`cupyx.errstate` or :func:`cupyx.seterr`. + Limitations + ----------- + Parameter `a` is supported as :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`. + Input array data types are limited by supported DPNP :ref:`Data types`. + + See Also + -------- + :obj:`dpnp.det` : Returns the determinant of an array. + + Examples + -------- + The determinant of a 2-D array [[a, b], [c, d]] is ad - bc: + + >>> import dpnp as dp + >>> a = dp.array([[1, 2], [3, 4]]) + >>> (sign, logabsdet) = dp.linalg.slogdet(a) + >>> (sign, logabsdet) + (array(-1.), array(0.69314718)) + >>> sign * dp.exp(logabsdet) + array(-2.) + + Computing log-determinants for a stack of matrices: - .. warning:: - To produce the same results as :func:`numpy.linalg.slogdet` for - singular inputs, set the `linalg` configuration to `raise`. + >>> a = dp.array([ [[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]] ]) + >>> a.shape + (3, 2, 2) + >>> sign, logabsdet = dp.linalg.slogdet(a) + >>> (sign, logabsdet) + (array([-1., -1., -1.]), array([0.69314718, 1.09861229, 2.07944154])) + >>> sign * dp.exp(logabsdet) + array([-2., -3., -8.]) - .. seealso:: :func:`numpy.linalg.slogdet` """ + if not dpnp.is_supported_array_type(a): + raise TypeError( + "An array must be any of supported type, but got {}".format(type(a)) + ) + # TODO: use dpnp.linalg.LinAlgError if a.ndim < 2: raise ValueError( @@ -651,6 +682,8 @@ def slogdet(a): raise ValueError("Last 2 dimensions of the input array must be square") exec_q = a.sycl_queue + a_usm_type = a.usm_type + a_sycl_queue = a.sycl_queue # dtype, sign_dtype = _util.linalg_common_type(a) # TODO: Use linalg_common_type from #1598 if dpnp.issubdtype(a.dtype, dpnp.floating): @@ -675,17 +708,19 @@ def slogdet(a): if a.size == 0: # empty batch (result is empty, too) or empty matrices det([[]]) == 1 - sign = dpnp.ones(shape, dtype=res_type) - logdet = dpnp.zeros(shape, dtype=logdet_dtype) + sign = dpnp.ones( + shape, dtype=res_type, usm_type=a_usm_type, sycl_queue=a_sycl_queue + ) + logdet = dpnp.zeros( + shape, + dtype=logdet_dtype, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) return sign, logdet lu, ipiv, dev_info = _lu_factor(a, res_type) - # dev_info < 0 means illegal value (in dimensions, strides, and etc.) that - # should never happen even if the matrix contains nan or inf. - # TODO(kataoka): assert dev_info >= 0 if synchronization is allowed for - # debugging purposes. - # Transposing 'lu' to swap the last two axes for compatibility # with 'dpnp.diagonal' as it does not support 'axis1' and 'axis2' arguments. # TODO: Replace with 'dpnp.diagonal(lu, axis1=-2, axis2=-1)' when supported. From 771605f018430dca77908d2423ba694bf6614127 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 30 Nov 2023 10:59:44 +0100 Subject: [PATCH 21/55] Add more tests --- tests/test_linalg.py | 96 +++++++++++++++++++++++++++++++++++++++- tests/test_sycl_queue.py | 43 ++++++++++++++++++ tests/test_usm_type.py | 31 +++++++++++++ 3 files changed, 169 insertions(+), 1 deletion(-) diff --git a/tests/test_linalg.py b/tests/test_linalg.py index 1631f22e1364..e1ce837a6cd2 100644 --- a/tests/test_linalg.py +++ b/tests/test_linalg.py @@ -1,7 +1,7 @@ import dpctl import numpy import pytest -from numpy.testing import assert_allclose, assert_array_equal +from numpy.testing import assert_allclose, assert_array_equal, assert_raises import dpnp as inp @@ -508,3 +508,97 @@ def test_svd(type, shape): assert_allclose( inp.asnumpy(dpnp_vt)[i, :], np_vt[i, :], rtol=tol, atol=tol ) + + +class TestSlogdet: + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) + def test_slogdet_2d(self, dtype): + a_np = numpy.array([[1, 2], [3, 4]], dtype=dtype) + a_dp = inp.array(a_np) + + sign_expected, logdet_expected = numpy.linalg.slogdet(a_np) + sign_result, logdet_result = inp.linalg.slogdet(a_dp) + + assert_allclose(sign_expected, sign_result) + assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) + def test_slogdet_3d(self, dtype): + a_np = numpy.array( + [ + [[1, 2], [3, 4]], + [[1, 2], [2, 1]], + [[1, 3], [3, 1]], + ], + dtype=dtype, + ) + a_dp = inp.array(a_np) + + sign_expected, logdet_expected = numpy.linalg.slogdet(a_np) + sign_result, logdet_result = inp.linalg.slogdet(a_dp) + + assert_allclose(sign_expected, sign_result) + assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4) + + def test_slogdet_strides(self): + a_np = numpy.array( + [ + [2, 3, 1, 4, 5], + [5, 6, 7, 8, 9], + [9, 7, 7, 2, 3], + [1, 4, 5, 1, 8], + [8, 9, 8, 5, 3], + ] + ) + + a_dp = inp.array(a_np) + + # positive strides + sign_expected, logdet_expected = numpy.linalg.slogdet(a_np[::2, ::2]) + sign_result, logdet_result = inp.linalg.slogdet(a_dp[::2, ::2]) + assert_allclose(sign_expected, sign_result) + assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4) + + # negative strides + sign_expected, logdet_expected = numpy.linalg.slogdet(a_np[::-2, ::-2]) + sign_result, logdet_result = inp.linalg.slogdet(a_dp[::-2, ::-2]) + assert_allclose(sign_expected, sign_result) + assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4) + + # TODO: remove skipif when MKLD-16626 is resolved + @pytest.mark.skipif(is_cpu_device(), reason="MKL bug MKLD-16626") + @pytest.mark.parametrize( + "matrix", + [ + [[1, 2], [2, 4]], + [[0, 0], [0, 0]], + [[1, 1], [1, 1]], + [[2, 4], [1, 2]], + [[1, 2], [0, 0]], + [[1, 0], [2, 0]], + ], + ids=[ + "Linearly dependent rows", + "Zero matrix", + "Identical rows", + "Linearly dependent columns", + "Zero row", + "Zero column", + ], + ) + def test_slogdet_singular_matrix(self, matrix): + a_np = numpy.array(matrix, dtype="float32") + a_dp = inp.array(a_np) + + sign_expected, logdet_expected = numpy.linalg.slogdet(a_np) + sign_result, logdet_result = inp.linalg.slogdet(a_dp) + + assert_allclose(sign_expected, sign_result) + assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4) + + def test_solve_errors(self): + a_dp = inp.array([[1, 2], [3, 5]], dtype="float32") + + # unsupported type + a_np = inp.asnumpy(a_dp) + assert_raises(TypeError, inp.linalg.slogdet, a_np) diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index 6de86cfd915f..5bd80e421518 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -1318,3 +1318,46 @@ def test_take(device): result_queue = result.get_array().sycl_queue assert_sycl_queue_equal(result_queue, expected_queue) + + +@pytest.mark.parametrize( + "shape, is_empty", + [ + ((2, 2), False), + ((3, 2, 2), False), + ((0, 0), True), + ((0, 2, 2), True), + ], + ids=[ + "(2, 2)", + "(3, 2, 2)", + "(0, 0)", + "(0, 2, 2)", + ], +) +@pytest.mark.parametrize( + "device", + valid_devices, + ids=[device.filter_string for device in valid_devices], +) +def test_slogdet(shape, is_empty, device): + if is_empty: + numpy_x = numpy.empty(shape, dtype=dpnp.default_float_type()) + else: + count_elem = numpy.prod(shape) + numpy_x = numpy.arange( + 1, count_elem + 1, dtype=dpnp.default_float_type() + ).reshape(shape) + + dpnp_x = dpnp.array(numpy_x, device=device) + + sign_result, logdet_result = dpnp.linalg.slogdet(dpnp_x) + sign_expected, logdet_expected = numpy.linalg.slogdet(numpy_x) + assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4) + assert_allclose(sign_expected, sign_result) + + sign_queue = sign_result.sycl_queue + logdet_queue = logdet_result.sycl_queue + + assert_sycl_queue_equal(sign_queue, dpnp_x.sycl_queue) + assert_sycl_queue_equal(logdet_queue, dpnp_x.sycl_queue) diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py index bc0919643bd3..d51f3f2d1ef5 100644 --- a/tests/test_usm_type.py +++ b/tests/test_usm_type.py @@ -483,3 +483,34 @@ def test_take(usm_type_x, usm_type_ind): assert x.usm_type == usm_type_x assert ind.usm_type == usm_type_ind assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_ind]) + + +@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types) +@pytest.mark.parametrize( + "shape, is_empty", + [ + ((2, 2), False), + ((3, 2, 2), False), + ((0, 0), True), + ((0, 2, 2), True), + ], + ids=[ + "(2, 2)", + "(3, 2, 2)", + "(0, 0)", + "(0, 2, 2)", + ], +) +def test_slogdet(shape, is_empty, usm_type): + if is_empty: + x = dp.empty(shape, dtype=dp.default_float_type(), usm_type=usm_type) + else: + count_elem = numpy.prod(shape) + x = dp.arange( + 1, count_elem + 1, dtype=dp.default_float_type(), usm_type=usm_type + ).reshape(shape) + + sign, logdet = dp.linalg.slogdet(x) + + assert x.usm_type == sign.usm_type + assert x.usm_type == logdet.usm_type From cde627da8ef1a737246e6c4b0127a311f79128c0 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 30 Nov 2023 13:13:58 +0100 Subject: [PATCH 22/55] Remove accidentally added file --- qwe.py | 39 --------------------------------------- 1 file changed, 39 deletions(-) delete mode 100644 qwe.py diff --git a/qwe.py b/qwe.py deleted file mode 100644 index ab03b4a3b5ad..000000000000 --- a/qwe.py +++ /dev/null @@ -1,39 +0,0 @@ -# import dpnp -# from dpnp.linalg. -# import numpy - -# a = numpy.arange(1,49,dtype='f4').reshape(2,2,3,4) -# a = numpy.arange(1,5,dtype='f4').reshape(2,2) -# a_dp = dpnp.array(a) -# print(numpy.linalg.svd(a,compute_uv=False)) -# print("===================") -# print(dpnp.linalg.svd(a_dp,compute_uv=False)) -# print("===================") -# a_dp = dpnp.array(a,device='cpu') -# print(dpnp.linalg.svd(a_dp,full_matrices=False)) - -# print(numpy.linalg.svd(a,full_matrices=False)) -# res_np = numpy.linalg.svd(a) -# print(res_np[0].shape) -# print(res_np[1].shape) -# print(res_np[2].shape) -# print("===================") -# print(dpnp.linalg.svd(a_dp, full_matrices=False)) -# res_dp = dpnp.linalg.svd(a_dp) -# print(res_dp[0].shape) -# print(res_dp[1].shape) -# print(res_dp[2].shape) -# print(a_dp) - - -from dpnp.linalg.dpnp_utils_linalg import _lu_factor -import dpnp -import numpy -import scipy - -a = numpy.array([[1, 2], [3, 4]],dtype="f4",order='C') -a_dp = dpnp.array(a) - -print(scipy.linalg.lu_factor(a)) -print("===========================") -print(_lu_factor(a_dp)) From 694870bf4255d9e92439efadd0f3602291b60b79 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 30 Nov 2023 13:15:32 +0100 Subject: [PATCH 23/55] Modify sign parameter calculation --- dpnp/linalg/dpnp_iface_linalg.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py index cbc742d72440..2a03818b7072 100644 --- a/dpnp/linalg/dpnp_iface_linalg.py +++ b/dpnp/linalg/dpnp_iface_linalg.py @@ -721,6 +721,8 @@ def slogdet(a): lu, ipiv, dev_info = _lu_factor(a, res_type) + # return lu, ipiv, dev_info + # Transposing 'lu' to swap the last two axes for compatibility # with 'dpnp.diagonal' as it does not support 'axis1' and 'axis2' arguments. # TODO: Replace with 'dpnp.diagonal(lu, axis1=-2, axis2=-1)' when supported. @@ -740,13 +742,13 @@ def slogdet(a): if res_type.kind == "f": non_zero += dpnp.count_nonzero(diag < 0, axis=-1) - sign = -(1 ** (non_zero % 2)) + sign = (non_zero % 2) * -2 + 1 if res_type.kind == "c": sign = sign * dpnp.prod(diag / dpnp.abs(diag), axis=-1) sign = sign.astype(res_type) logdet = logdet.astype(logdet_dtype, copy=False) - singular = dev_info > 0 + singular = dpnp.array([dev_info > 0]) return ( dpnp.where(singular, res_type.type(0), sign).reshape(shape), dpnp.where(singular, logdet_dtype.type("-inf"), logdet).reshape(shape), From d85d00db79d4cfac0e1c4a134d323519b5fd1896 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 30 Nov 2023 13:24:20 +0100 Subject: [PATCH 24/55] Remove the old backend implementation of dpnp_det --- dpnp/backend/include/dpnp_iface_fptr.hpp | 2 -- dpnp/backend/kernels/dpnp_krnl_linalg.cpp | 9 ----- dpnp/dpnp_algo/dpnp_algo.pxd | 1 - dpnp/linalg/dpnp_algo_linalg.pyx | 42 ----------------------- 4 files changed, 54 deletions(-) diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp index c56f38ffcb5e..7ab6852685a2 100644 --- a/dpnp/backend/include/dpnp_iface_fptr.hpp +++ b/dpnp/backend/include/dpnp_iface_fptr.hpp @@ -121,8 +121,6 @@ enum class DPNPFuncName : size_t DPNP_FN_DEGREES_EXT, /**< Used in numpy.degrees() impl, requires extra parameters */ DPNP_FN_DET, /**< Used in numpy.linalg.det() impl */ - DPNP_FN_DET_EXT, /**< Used in numpy.linalg.det() impl, requires extra - parameters */ DPNP_FN_DIAG, /**< Used in numpy.diag() impl */ DPNP_FN_DIAG_INDICES, /**< Used in numpy.diag_indices() impl */ DPNP_FN_DIAG_INDICES_EXT, /**< Used in numpy.diag_indices() impl, requires diff --git a/dpnp/backend/kernels/dpnp_krnl_linalg.cpp b/dpnp/backend/kernels/dpnp_krnl_linalg.cpp index 5bf54c2b84d3..b3dd23f3c0bc 100644 --- a/dpnp/backend/kernels/dpnp_krnl_linalg.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_linalg.cpp @@ -866,15 +866,6 @@ void func_map_init_linalg_func(func_map_t &fmap) fmap[DPNPFuncName::DPNP_FN_DET][eft_DBL][eft_DBL] = { eft_DBL, (void *)dpnp_det_default_c}; - fmap[DPNPFuncName::DPNP_FN_DET_EXT][eft_INT][eft_INT] = { - eft_INT, (void *)dpnp_det_ext_c}; - fmap[DPNPFuncName::DPNP_FN_DET_EXT][eft_LNG][eft_LNG] = { - eft_LNG, (void *)dpnp_det_ext_c}; - fmap[DPNPFuncName::DPNP_FN_DET_EXT][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_det_ext_c}; - fmap[DPNPFuncName::DPNP_FN_DET_EXT][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_det_ext_c}; - fmap[DPNPFuncName::DPNP_FN_INV][eft_INT][eft_INT] = { eft_DBL, (void *)dpnp_inv_default_c}; fmap[DPNPFuncName::DPNP_FN_INV][eft_LNG][eft_LNG] = { diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd index 80c6035d7a9f..3b0699231538 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pxd +++ b/dpnp/dpnp_algo/dpnp_algo.pxd @@ -55,7 +55,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this na DPNP_FN_DEGREES DPNP_FN_DEGREES_EXT DPNP_FN_DET - DPNP_FN_DET_EXT DPNP_FN_DIAG_INDICES DPNP_FN_DIAG_INDICES_EXT DPNP_FN_DIAGONAL diff --git a/dpnp/linalg/dpnp_algo_linalg.pyx b/dpnp/linalg/dpnp_algo_linalg.pyx index c86b869acd3c..ff5ea6253db6 100644 --- a/dpnp/linalg/dpnp_algo_linalg.pyx +++ b/dpnp/linalg/dpnp_algo_linalg.pyx @@ -47,7 +47,6 @@ cimport dpnp.dpnp_utils as utils __all__ = [ "dpnp_cholesky", "dpnp_cond", - "dpnp_det", "dpnp_eig", "dpnp_eigvals", "dpnp_inv", @@ -140,47 +139,6 @@ cpdef object dpnp_cond(object input, object p): return ret -cpdef utils.dpnp_descriptor dpnp_det(utils.dpnp_descriptor input): - cdef shape_type_c input_shape = input.shape - cdef size_t n = input.shape[-1] - cdef shape_type_c result_shape = (1,) - if input.ndim != 2: - result_shape = tuple((list(input.shape))[:-2]) - - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype) - - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_DET_EXT, param1_type, param1_type) - - input_obj = input.get_array() - - # create result array with type given by FPTR data - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, - kernel_data.return_type, - None, - device=input_obj.sycl_device, - usm_type=input_obj.usm_type, - sycl_queue=input_obj.sycl_queue) - - result_sycl_queue = result.get_array().sycl_queue - - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - cdef custom_linalg_1in_1out_func_ptr_t func = kernel_data.ptr - - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, - input.get_data(), - result.get_data(), - input_shape.data(), - input.ndim, - NULL) # dep_events_ref - - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) - c_dpctl.DPCTLEvent_Delete(event_ref) - - return result - - cpdef tuple dpnp_eig(utils.dpnp_descriptor x1): cdef shape_type_c x1_shape = x1.shape From c4b9992da1abb4a95f71204b6b9cbf7d04096d05 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 30 Nov 2023 15:19:51 +0100 Subject: [PATCH 25/55] qwe --- perf.py | 32 ++++++++++++++++++++++++++++++++ qaz.py | 17 +++++++++++++++++ test.py | 27 +++++++++++++++++++++++++++ 3 files changed, 76 insertions(+) create mode 100644 perf.py create mode 100644 qaz.py create mode 100644 test.py diff --git a/perf.py b/perf.py new file mode 100644 index 000000000000..de88e2c24132 --- /dev/null +++ b/perf.py @@ -0,0 +1,32 @@ +import numpy +import dpnp + + +# na = numpy.random.randint(-10**4, 10**4, size=(4096,4096)) + +# a = numpy.array(na, dtype = "int32") + +# a_dp = dpnp.array(a,device='cpu') + +# def test(xp,dtype,device): +# na = numpy.random.randint(-10**4, 10**4, size=(4096,4096)) + +# a = numpy.array(na, dtype = dtype) +# a_dp = dpnp.array(a,device=device) + +# if xp == 0: +# numpy.linalg.slogdet(a) +# else: +# dpnp.linalg.slogdet(a_dp) + +def init(na, dtype): + # na = numpy.random.randint(-10**4, 10**4, size=(4096,4096)) + + a = numpy.array(na, dtype = dtype) + a_dp_cpu = dpnp.array(a,device='cpu') + a_dp_gpu = dpnp.array(a,device='gpu') + + numpy.linalg.slogdet(a) + dpnp.linalg.slogdet(a_dp_cpu) + + return a,a_dp_cpu,a_dp_gpu diff --git a/qaz.py b/qaz.py new file mode 100644 index 000000000000..755f6b25e447 --- /dev/null +++ b/qaz.py @@ -0,0 +1,17 @@ +import dpnp +import numpy + +a = numpy.array( + [ + [2, 3, 1, 4, 5], + [5, 6, 7, 8, 9], + [9, 7, 7, 2, 3], + [1, 4, 5, 1, 8], + [8, 9, 8, 5, 3], + ] +) + +a_dp = dpnp.array(a) + +print(numpy.linalg.slogdet(a)) +print(dpnp.linalg.slogdet(a_dp)) diff --git a/test.py b/test.py new file mode 100644 index 000000000000..fae55c253eb4 --- /dev/null +++ b/test.py @@ -0,0 +1,27 @@ +import dpnp +import numpy +import cProfile, pstats, io + + +# na = numpy.random.randint(-10**4, 10**4, size=(4096,4096)) +na = numpy.random.randint(-10**4, 10**4, size=(8192,8192)) +a = numpy.array(na, dtype = "int32") +a_dp = dpnp.array(a, device='cpu') + +a_stride = a[::2,::2] +a_dp_stride = a_dp[::2,::2] +dpnp.linalg.slogdet(a_dp_stride) + +pr = cProfile.Profile() +pr.enable() +# res = numpy.linalg.slogdet(a) +res = dpnp.linalg.slogdet(a_dp_stride) +pr.disable() + +s = io.StringIO() +sortBy = pstats.SortKey.CUMULATIVE + +ps = pstats.Stats(pr, stream=s).sort_stats(sortBy) +ps.print_stats() + +print(s.getvalue()) From 78b98e73911e19a95e7ff4044724aa4487395d94 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 15 Dec 2023 12:52:38 +0100 Subject: [PATCH 26/55] Keep lexographical order --- dpnp/backend/extensions/lapack/lapack_py.cpp | 28 +++---- .../extensions/lapack/types_matrix.hpp | 75 ++++++++++--------- 2 files changed, 52 insertions(+), 51 deletions(-) diff --git a/dpnp/backend/extensions/lapack/lapack_py.cpp b/dpnp/backend/extensions/lapack/lapack_py.cpp index 71fcb0067aab..acf26989edd9 100644 --- a/dpnp/backend/extensions/lapack/lapack_py.cpp +++ b/dpnp/backend/extensions/lapack/lapack_py.cpp @@ -71,20 +71,6 @@ PYBIND11_MODULE(_lapack_impl, m) py::arg("sycl_queue"), py::arg("coeff_matrix"), py::arg("dependent_vals"), py::arg("depends") = py::list()); - m.def("_heevd", &lapack_ext::heevd, - "Call `heevd` from OneMKL LAPACK library to return " - "the eigenvalues and eigenvectors of a complex Hermitian matrix", - py::arg("sycl_queue"), py::arg("jobz"), py::arg("upper_lower"), - py::arg("eig_vecs"), py::arg("eig_vals"), - py::arg("depends") = py::list()); - - m.def("_syevd", &lapack_ext::syevd, - "Call `syevd` from OneMKL LAPACK library to return " - "the eigenvalues and eigenvectors of a real symmetric matrix", - py::arg("sycl_queue"), py::arg("jobz"), py::arg("upper_lower"), - py::arg("eig_vecs"), py::arg("eig_vals"), - py::arg("depends") = py::list()); - m.def("_getrf", &lapack_ext::getrf, "Call `getrf` from OneMKL LAPACK library to return " "the LU factorization of a general n x n matrix", @@ -99,4 +85,18 @@ PYBIND11_MODULE(_lapack_impl, m) py::arg("dev_info_array"), py::arg("n"), py::arg("stride_a"), py::arg("stride_ipiv"), py::arg("batch_size"), py::arg("depends") = py::list()); + + m.def("_heevd", &lapack_ext::heevd, + "Call `heevd` from OneMKL LAPACK library to return " + "the eigenvalues and eigenvectors of a complex Hermitian matrix", + py::arg("sycl_queue"), py::arg("jobz"), py::arg("upper_lower"), + py::arg("eig_vecs"), py::arg("eig_vals"), + py::arg("depends") = py::list()); + + m.def("_syevd", &lapack_ext::syevd, + "Call `syevd` from OneMKL LAPACK library to return " + "the eigenvalues and eigenvectors of a real symmetric matrix", + py::arg("sycl_queue"), py::arg("jobz"), py::arg("upper_lower"), + py::arg("eig_vecs"), py::arg("eig_vals"), + py::arg("depends") = py::list()); } diff --git a/dpnp/backend/extensions/lapack/types_matrix.hpp b/dpnp/backend/extensions/lapack/types_matrix.hpp index ecce0a0c0bec..59198e975c8a 100644 --- a/dpnp/backend/extensions/lapack/types_matrix.hpp +++ b/dpnp/backend/extensions/lapack/types_matrix.hpp @@ -69,43 +69,6 @@ struct GesvTypePairSupportFactory // fall-through dpctl_td_ns::NotDefinedEntry>::is_defined; }; -/** - * @brief A factory to define pairs of supported types for which - * MKL LAPACK library provides support in oneapi::mkl::lapack::heevd - * function. - * - * @tparam T Type of array containing input matrix A and an output array with - * eigenvectors. - * @tparam RealT Type of output array containing eigenvalues of A. - */ -template -struct HeevdTypePairSupportFactory -{ - static constexpr bool is_defined = std::disjunction< - dpctl_td_ns:: - TypePairDefinedEntry, RealT, double>, - dpctl_td_ns::TypePairDefinedEntry, RealT, float>, - // fall-through - dpctl_td_ns::NotDefinedEntry>::is_defined; -}; - -/** - * @brief A factory to define pairs of supported types for which - * MKL LAPACK library provides support in oneapi::mkl::lapack::syevd - * function. - * - * @tparam T Type of array containing input matrix A and an output arrays with - * eigenvectors and eigenvectors. - */ -template -struct SyevdTypePairSupportFactory -{ - static constexpr bool is_defined = std::disjunction< - dpctl_td_ns::TypePairDefinedEntry, - dpctl_td_ns::TypePairDefinedEntry, - // fall-through - dpctl_td_ns::NotDefinedEntry>::is_defined; -}; /** * @brief A factory to define pairs of supported types for which @@ -158,6 +121,44 @@ struct GetrfBatchTypePairSupportFactory // fall-through dpctl_td_ns::NotDefinedEntry>::is_defined; }; + +/** + * @brief A factory to define pairs of supported types for which + * MKL LAPACK library provides support in oneapi::mkl::lapack::heevd + * function. + * + * @tparam T Type of array containing input matrix A and an output array with + * eigenvectors. + * @tparam RealT Type of output array containing eigenvalues of A. + */ +template +struct HeevdTypePairSupportFactory +{ + static constexpr bool is_defined = std::disjunction< + dpctl_td_ns:: + TypePairDefinedEntry, RealT, double>, + dpctl_td_ns::TypePairDefinedEntry, RealT, float>, + // fall-through + dpctl_td_ns::NotDefinedEntry>::is_defined; +}; + +/** + * @brief A factory to define pairs of supported types for which + * MKL LAPACK library provides support in oneapi::mkl::lapack::syevd + * function. + * + * @tparam T Type of array containing input matrix A and an output arrays with + * eigenvectors and eigenvectors. + */ +template +struct SyevdTypePairSupportFactory +{ + static constexpr bool is_defined = std::disjunction< + dpctl_td_ns::TypePairDefinedEntry, + dpctl_td_ns::TypePairDefinedEntry, + // fall-through + dpctl_td_ns::NotDefinedEntry>::is_defined; +}; } // namespace types } // namespace lapack } // namespace ext From 46a996566fc2006d510b245c1472329abd9c3b20 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 15 Dec 2023 13:12:13 +0100 Subject: [PATCH 27/55] Add dpnp_slogdet to dpnp_utils_linalg --- dpnp/linalg/dpnp_iface_linalg.py | 94 ++------------------------------ dpnp/linalg/dpnp_utils_linalg.py | 67 ++++++++++++++++++++++- 2 files changed, 71 insertions(+), 90 deletions(-) diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py index 3c91e7ff294e..384543e268ae 100644 --- a/dpnp/linalg/dpnp_iface_linalg.py +++ b/dpnp/linalg/dpnp_iface_linalg.py @@ -48,10 +48,10 @@ from dpnp.linalg.dpnp_algo_linalg import * from .dpnp_utils_linalg import ( - _lu_factor, check_stacked_2d, check_stacked_square, dpnp_eigh, + dpnp_slogdet, dpnp_solve, ) @@ -721,92 +721,8 @@ def slogdet(a): """ - if not dpnp.is_supported_array_type(a): - raise TypeError( - "An array must be any of supported type, but got {}".format(type(a)) - ) - - # TODO: use dpnp.linalg.LinAlgError - if a.ndim < 2: - raise ValueError( - f"{a.ndim}-dimensional array given. The input " - "array must be at least two-dimensional" - ) - - n, m = a.shape[-2:] - # TODO: use dpnp.linalg.LinAlgError - if m != n: - raise ValueError("Last 2 dimensions of the input array must be square") - - exec_q = a.sycl_queue - a_usm_type = a.usm_type - a_sycl_queue = a.sycl_queue - # dtype, sign_dtype = _util.linalg_common_type(a) - # TODO: Use linalg_common_type from #1598 - if dpnp.issubdtype(a.dtype, dpnp.floating): - res_type = ( - a.dtype if exec_q.sycl_device.has_aspect_fp64 else dpnp.float32 - ) - elif dpnp.issubdtype(a.dtype, dpnp.complexfloating): - res_type = ( - a.dtype if exec_q.sycl_device.has_aspect_fp64 else dpnp.complex64 - ) - else: - res_type = ( - dpnp.float64 if exec_q.sycl_device.has_aspect_fp64 else dpnp.float32 - ) - - res_type = dpnp.dtype(res_type) - logdet_dtype = dpnp.dtype(res_type.char.lower()) - - a_shape = a.shape - shape = a_shape[:-2] - n = a_shape[-2] - - if a.size == 0: - # empty batch (result is empty, too) or empty matrices det([[]]) == 1 - sign = dpnp.ones( - shape, dtype=res_type, usm_type=a_usm_type, sycl_queue=a_sycl_queue - ) - logdet = dpnp.zeros( - shape, - dtype=logdet_dtype, - usm_type=a_usm_type, - sycl_queue=a_sycl_queue, - ) - return sign, logdet - - lu, ipiv, dev_info = _lu_factor(a, res_type) - - # return lu, ipiv, dev_info - - # Transposing 'lu' to swap the last two axes for compatibility - # with 'dpnp.diagonal' as it does not support 'axis1' and 'axis2' arguments. - # TODO: Replace with 'dpnp.diagonal(lu, axis1=-2, axis2=-1)' when supported. - lu_transposed = lu.transpose(-2, -1, *range(lu.ndim - 2)) - diag = dpnp.diagonal(lu_transposed) - - logdet = dpnp.log(dpnp.abs(diag)).sum(axis=-1) + dpnp.check_supported_arrays_type(a) + check_stacked_2d(a) + check_stacked_square(a) - # ipiv is 1-origin - non_zero = dpnp.count_nonzero( - ipiv - != dpnp.arange( - 1, n + 1, usm_type=ipiv.usm_type, sycl_queue=ipiv.sycl_queue - ), - axis=-1, - ) - if res_type.kind == "f": - non_zero += dpnp.count_nonzero(diag < 0, axis=-1) - - sign = (non_zero % 2) * -2 + 1 - if res_type.kind == "c": - sign = sign * dpnp.prod(diag / dpnp.abs(diag), axis=-1) - - sign = sign.astype(res_type) - logdet = logdet.astype(logdet_dtype, copy=False) - singular = dpnp.array([dev_info > 0]) - return ( - dpnp.where(singular, res_type.type(0), sign).reshape(shape), - dpnp.where(singular, logdet_dtype.type("-inf"), logdet).reshape(shape), - ) + return dpnp_slogdet(a) diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 0ad62bdb9f06..83fd83a9f4b6 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -35,11 +35,11 @@ from dpnp.dpnp_utils import get_usm_allocations __all__ = [ - "_lu_factor", "check_stacked_2d", "check_stacked_square", "dpnp_eigh", "dpnp_solve", + "dpnp_slogdet", ] _jobz = {"N": 0, "V": 1} @@ -619,3 +619,68 @@ def dpnp_solve(a, b): a_ht_copy_ev.wait() return b_f + + +def dpnp_slogdet(a): + """ + dpnp_slogdet(a) + + Returns sign and logarithm of the determinant of `a` array. + + """ + + a_usm_type = a.usm_type + a_sycl_queue = a.sycl_queue + + res_type = _common_type(a) + logdet_dtype = dpnp.dtype(res_type.char.lower()) + + a_shape = a.shape + shape = a_shape[:-2] + n = a_shape[-2] + + if a.size == 0: + # empty batch (result is empty, too) or empty matrices det([[]]) == 1 + sign = dpnp.ones( + shape, dtype=res_type, usm_type=a_usm_type, sycl_queue=a_sycl_queue + ) + logdet = dpnp.zeros( + shape, + dtype=logdet_dtype, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + return sign, logdet + + lu, ipiv, dev_info = _lu_factor(a, res_type) + + # Transposing 'lu' to swap the last two axes for compatibility + # with 'dpnp.diagonal' as it does not support 'axis1' and 'axis2' arguments. + # TODO: Replace with 'dpnp.diagonal(lu, axis1=-2, axis2=-1)' when supported. + lu_transposed = lu.transpose(-2, -1, *range(lu.ndim - 2)) + diag = dpnp.diagonal(lu_transposed) + + logdet = dpnp.log(dpnp.abs(diag)).sum(axis=-1) + + # ipiv is 1-origin + non_zero = dpnp.count_nonzero( + ipiv + != dpnp.arange( + 1, n + 1, usm_type=ipiv.usm_type, sycl_queue=ipiv.sycl_queue + ), + axis=-1, + ) + if res_type.kind == "f": + non_zero += dpnp.count_nonzero(diag < 0, axis=-1) + + sign = (non_zero % 2) * -2 + 1 + if res_type.kind == "c": + sign = sign * dpnp.prod(diag / dpnp.abs(diag), axis=-1) + + sign = sign.astype(res_type) + logdet = logdet.astype(logdet_dtype, copy=False) + singular = dpnp.array([dev_info > 0]) + return ( + dpnp.where(singular, res_type.type(0), sign).reshape(shape), + dpnp.where(singular, logdet_dtype.type("-inf"), logdet).reshape(shape), + ) From da16383c55b32a80c2a18c9c327c2d92638577c8 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 15 Dec 2023 13:14:15 +0100 Subject: [PATCH 28/55] Move _lu_factor above --- dpnp/linalg/dpnp_utils_linalg.py | 256 +++++++++++++++---------------- 1 file changed, 128 insertions(+), 128 deletions(-) diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 83fd83a9f4b6..ae29c30cb59d 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -175,134 +175,6 @@ def _common_inexact_type(default_dtype, *dtypes): return dpnp.result_type(*inexact_dtypes) -def dpnp_eigh(a, UPLO): - """ - dpnp_eigh(a, UPLO) - - Return the eigenvalues and eigenvectors of a complex Hermitian - (conjugate symmetric) or a real symmetric matrix. - - The main calculation is done by calling an extension function - for LAPACK library of OneMKL. Depending on input type of `a` array, - it will be either ``heevd`` (for complex types) or ``syevd`` (for others). - - """ - - a_usm_type = a.usm_type - a_sycl_queue = a.sycl_queue - a_order = "C" if a.flags.c_contiguous else "F" - a_usm_arr = dpnp.get_usm_ndarray(a) - - # 'V' means both eigenvectors and eigenvalues will be calculated - jobz = _jobz["V"] - uplo = _upper_lower[UPLO] - - # get resulting type of arrays with eigenvalues and eigenvectors - a_dtype = a.dtype - lapack_func = "_syevd" - if dpnp.issubdtype(a_dtype, dpnp.complexfloating): - lapack_func = "_heevd" - v_type = a_dtype - w_type = dpnp.float64 if a_dtype == dpnp.complex128 else dpnp.float32 - elif dpnp.issubdtype(a_dtype, dpnp.floating): - v_type = w_type = a_dtype - elif a_sycl_queue.sycl_device.has_aspect_fp64: - v_type = w_type = dpnp.float64 - else: - v_type = w_type = dpnp.float32 - - if a.ndim > 2: - w = dpnp.empty( - a.shape[:-1], - dtype=w_type, - usm_type=a_usm_type, - sycl_queue=a_sycl_queue, - ) - - # need to loop over the 1st dimension to get eigenvalues and eigenvectors of 3d matrix A - op_count = a.shape[0] - if op_count == 0: - return w, dpnp.empty_like(a, dtype=v_type) - - eig_vecs = [None] * op_count - ht_copy_ev = [None] * op_count - ht_lapack_ev = [None] * op_count - for i in range(op_count): - # oneMKL LAPACK assumes fortran-like array as input, so - # allocate a memory with 'F' order for dpnp array of eigenvectors - eig_vecs[i] = dpnp.empty_like(a[i], order="F", dtype=v_type) - - # use DPCTL tensor function to fill the array of eigenvectors with content of input array - ht_copy_ev[i], copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( - src=a_usm_arr[i], - dst=eig_vecs[i].get_array(), - sycl_queue=a_sycl_queue, - ) - - # call LAPACK extension function to get eigenvalues and eigenvectors of a portion of matrix A - ht_lapack_ev[i], _ = getattr(li, lapack_func)( - a_sycl_queue, - jobz, - uplo, - eig_vecs[i].get_array(), - w[i].get_array(), - depends=[copy_ev], - ) - - for i in range(op_count): - ht_lapack_ev[i].wait() - ht_copy_ev[i].wait() - - # combine the list of eigenvectors into a single array - v = dpnp.array(eig_vecs, order=a_order) - return w, v - else: - # oneMKL LAPACK assumes fortran-like array as input, so - # allocate a memory with 'F' order for dpnp array of eigenvectors - v = dpnp.empty_like(a, order="F", dtype=v_type) - - # use DPCTL tensor function to fill the array of eigenvectors with content of input array - ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( - src=a_usm_arr, dst=v.get_array(), sycl_queue=a_sycl_queue - ) - - # allocate a memory for dpnp array of eigenvalues - w = dpnp.empty( - a.shape[:-1], - dtype=w_type, - usm_type=a_usm_type, - sycl_queue=a_sycl_queue, - ) - - # call LAPACK extension function to get eigenvalues and eigenvectors of matrix A - ht_lapack_ev, lapack_ev = getattr(li, lapack_func)( - a_sycl_queue, - jobz, - uplo, - v.get_array(), - w.get_array(), - depends=[copy_ev], - ) - - if a_order != "F": - # need to align order of eigenvectors with one of input matrix A - out_v = dpnp.empty_like(v, order=a_order) - ht_copy_out_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray( - src=v.get_array(), - dst=out_v.get_array(), - sycl_queue=a_sycl_queue, - depends=[lapack_ev], - ) - ht_copy_out_ev.wait() - else: - out_v = v - - ht_lapack_ev.wait() - ht_copy_ev.wait() - - return w, out_v - - def _lu_factor(a, res_type): """Compute pivoted LU decomposition. @@ -489,6 +361,134 @@ def _lu_factor(a, res_type): return (a_h, ipiv_h, dev_info_h) +def dpnp_eigh(a, UPLO): + """ + dpnp_eigh(a, UPLO) + + Return the eigenvalues and eigenvectors of a complex Hermitian + (conjugate symmetric) or a real symmetric matrix. + + The main calculation is done by calling an extension function + for LAPACK library of OneMKL. Depending on input type of `a` array, + it will be either ``heevd`` (for complex types) or ``syevd`` (for others). + + """ + + a_usm_type = a.usm_type + a_sycl_queue = a.sycl_queue + a_order = "C" if a.flags.c_contiguous else "F" + a_usm_arr = dpnp.get_usm_ndarray(a) + + # 'V' means both eigenvectors and eigenvalues will be calculated + jobz = _jobz["V"] + uplo = _upper_lower[UPLO] + + # get resulting type of arrays with eigenvalues and eigenvectors + a_dtype = a.dtype + lapack_func = "_syevd" + if dpnp.issubdtype(a_dtype, dpnp.complexfloating): + lapack_func = "_heevd" + v_type = a_dtype + w_type = dpnp.float64 if a_dtype == dpnp.complex128 else dpnp.float32 + elif dpnp.issubdtype(a_dtype, dpnp.floating): + v_type = w_type = a_dtype + elif a_sycl_queue.sycl_device.has_aspect_fp64: + v_type = w_type = dpnp.float64 + else: + v_type = w_type = dpnp.float32 + + if a.ndim > 2: + w = dpnp.empty( + a.shape[:-1], + dtype=w_type, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + + # need to loop over the 1st dimension to get eigenvalues and eigenvectors of 3d matrix A + op_count = a.shape[0] + if op_count == 0: + return w, dpnp.empty_like(a, dtype=v_type) + + eig_vecs = [None] * op_count + ht_copy_ev = [None] * op_count + ht_lapack_ev = [None] * op_count + for i in range(op_count): + # oneMKL LAPACK assumes fortran-like array as input, so + # allocate a memory with 'F' order for dpnp array of eigenvectors + eig_vecs[i] = dpnp.empty_like(a[i], order="F", dtype=v_type) + + # use DPCTL tensor function to fill the array of eigenvectors with content of input array + ht_copy_ev[i], copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_usm_arr[i], + dst=eig_vecs[i].get_array(), + sycl_queue=a_sycl_queue, + ) + + # call LAPACK extension function to get eigenvalues and eigenvectors of a portion of matrix A + ht_lapack_ev[i], _ = getattr(li, lapack_func)( + a_sycl_queue, + jobz, + uplo, + eig_vecs[i].get_array(), + w[i].get_array(), + depends=[copy_ev], + ) + + for i in range(op_count): + ht_lapack_ev[i].wait() + ht_copy_ev[i].wait() + + # combine the list of eigenvectors into a single array + v = dpnp.array(eig_vecs, order=a_order) + return w, v + else: + # oneMKL LAPACK assumes fortran-like array as input, so + # allocate a memory with 'F' order for dpnp array of eigenvectors + v = dpnp.empty_like(a, order="F", dtype=v_type) + + # use DPCTL tensor function to fill the array of eigenvectors with content of input array + ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_usm_arr, dst=v.get_array(), sycl_queue=a_sycl_queue + ) + + # allocate a memory for dpnp array of eigenvalues + w = dpnp.empty( + a.shape[:-1], + dtype=w_type, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + + # call LAPACK extension function to get eigenvalues and eigenvectors of matrix A + ht_lapack_ev, lapack_ev = getattr(li, lapack_func)( + a_sycl_queue, + jobz, + uplo, + v.get_array(), + w.get_array(), + depends=[copy_ev], + ) + + if a_order != "F": + # need to align order of eigenvectors with one of input matrix A + out_v = dpnp.empty_like(v, order=a_order) + ht_copy_out_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray( + src=v.get_array(), + dst=out_v.get_array(), + sycl_queue=a_sycl_queue, + depends=[lapack_ev], + ) + ht_copy_out_ev.wait() + else: + out_v = v + + ht_lapack_ev.wait() + ht_copy_ev.wait() + + return w, out_v + + def dpnp_solve(a, b): """ dpnp_solve(a, b) From 4e0e1831a9bb4adfe825abf0c83158821af7510b Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 15 Dec 2023 13:54:39 +0100 Subject: [PATCH 29/55] A minor update --- dpnp/backend/extensions/lapack/getrf.cpp | 21 ++++++++------- dpnp/backend/extensions/lapack/getrf.hpp | 2 +- .../backend/extensions/lapack/getrf_batch.cpp | 27 +++++++++---------- dpnp/backend/extensions/lapack/lapack_py.cpp | 4 +-- dpnp/linalg/dpnp_utils_linalg.py | 4 +-- tests/test_sycl_queue.py | 4 +-- 6 files changed, 31 insertions(+), 31 deletions(-) diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp index a83d94ac5069..3fe7927a9a01 100644 --- a/dpnp/backend/extensions/lapack/getrf.cpp +++ b/dpnp/backend/extensions/lapack/getrf.cpp @@ -72,7 +72,7 @@ static sycl::event getrf_impl(sycl::queue exec_q, T *a = reinterpret_cast(in_a); const std::int64_t scratchpad_size = - oneapi::mkl::lapack::getrf_scratchpad_size(exec_q, n, n, lda); + mkl_lapack::getrf_scratchpad_size(exec_q, n, n, lda); T *scratchpad = nullptr; std::stringstream error_msg; @@ -82,13 +82,16 @@ static sycl::event getrf_impl(sycl::queue exec_q, try { scratchpad = sycl::malloc_device(scratchpad_size, exec_q); - getrf_event = oneapi::mkl::lapack::getrf( + getrf_event = mkl_lapack::getrf( exec_q, - n, // Order of the square matrix; (0 ≤ n). - n, // Order of the square matrix; (0 ≤ n). - a, // Pointer to the n-by-n matrix. - lda, // The leading dimension of `a`. - ipiv, // Pointer to the array of pivot indices. + n, // The order of the square matrix A (0 ≤ n). + // It must be a non-negative integer. + n, // The number of columns in the square matrix A (0 ≤ n). + // It must be a non-negative integer. + a, // Pointer to the square matrix A (n x n). + lda, // The leading dimension of matrix A. + // It must be at least max(1, n). + ipiv, // Pointer to the output array of pivot indices. scratchpad, // Pointer to scratchpad memory to be used by MKL // routine for storing intermediate results. scratchpad_size, depends); @@ -132,10 +135,10 @@ static sycl::event getrf_impl(sycl::queue exec_q, std::pair getrf(sycl::queue q, - const std::int64_t n, dpctl::tensor::usm_ndarray a_array, dpctl::tensor::usm_ndarray ipiv_array, dpctl::tensor::usm_ndarray dev_info_array, + const std::int64_t n, const std::vector &depends) { @@ -153,8 +156,6 @@ std::pair char *a_array_data = a_array.get_data(); const std::int64_t lda = std::max(1UL, n); - // check valid ipiv - char *ipiv_array_data = ipiv_array.get_data(); std::int64_t *d_ipiv = reinterpret_cast(ipiv_array_data); diff --git a/dpnp/backend/extensions/lapack/getrf.hpp b/dpnp/backend/extensions/lapack/getrf.hpp index e66edefc52cd..38db17bd9a03 100644 --- a/dpnp/backend/extensions/lapack/getrf.hpp +++ b/dpnp/backend/extensions/lapack/getrf.hpp @@ -40,10 +40,10 @@ namespace lapack { extern std::pair getrf(sycl::queue exec_q, - const std::int64_t n, dpctl::tensor::usm_ndarray a_array, dpctl::tensor::usm_ndarray ipiv_array, dpctl::tensor::usm_ndarray dev_info_array, + const std::int64_t n, const std::vector &depends = {}); extern std::pair diff --git a/dpnp/backend/extensions/lapack/getrf_batch.cpp b/dpnp/backend/extensions/lapack/getrf_batch.cpp index b552358965d1..b15fac74efee 100644 --- a/dpnp/backend/extensions/lapack/getrf_batch.cpp +++ b/dpnp/backend/extensions/lapack/getrf_batch.cpp @@ -80,8 +80,8 @@ static sycl::event getrf_batch_impl(sycl::queue exec_q, T *a = reinterpret_cast(in_a); const std::int64_t scratchpad_size = - oneapi::mkl::lapack::getrf_batch_scratchpad_size( - exec_q, n, n, lda, stride_a, stride_ipiv, batch_size); + mkl_lapack::getrf_batch_scratchpad_size(exec_q, n, n, lda, stride_a, + stride_ipiv, batch_size); T *scratchpad = nullptr; std::stringstream error_msg; @@ -91,19 +91,20 @@ static sycl::event getrf_batch_impl(sycl::queue exec_q, try { scratchpad = sycl::malloc_device(scratchpad_size, exec_q); - getrf_batch_event = oneapi::mkl::lapack::getrf_batch( + getrf_batch_event = mkl_lapack::getrf_batch( exec_q, - n, // Order of each square matrix in the batch; (0 ≤ n). - n, // Order of each square matrix in the batch; (0 ≤ n). - a, // Pointer to the batch of matrices. - lda, // The leading dimension of `a`. - stride_a, // Stride between matrices: Element spacing between - // matrices in `a`. - ipiv, // Pivot indices: Pointer to the pivot indices for each - // matrix. + n, // The order of each square matrix in the batch; (0 ≤ n). + // It must be a non-negative integer. + n, // The number of columns in each matrix in the batch; (0 ≤ n). + // It must be a non-negative integer. + a, // Pointer to the batch of square matrices, each of size (n x n). + lda, // The leading dimension of each matrix in the batch. + stride_a, // Stride between consecutive matrices in the batch. + ipiv, // Pointer to the array of pivot indices for each matrix in + // the batch. stride_ipiv, // Stride between pivot indices: Spacing between pivot // arrays in 'ipiv'. - batch_size, // Total number of matrices in the batch. + batch_size, // Stride between pivot index arrays in the batch. scratchpad, // Pointer to scratchpad memory to be used by MKL // routine for storing intermediate results. scratchpad_size, depends); @@ -172,8 +173,6 @@ std::pair char *a_array_data = a_array.get_data(); const std::int64_t lda = std::max(1UL, n); - // check valid ipiv - char *ipiv_array_data = ipiv_array.get_data(); std::int64_t *d_ipiv = reinterpret_cast(ipiv_array_data); diff --git a/dpnp/backend/extensions/lapack/lapack_py.cpp b/dpnp/backend/extensions/lapack/lapack_py.cpp index acf26989edd9..2b5ad61678f6 100644 --- a/dpnp/backend/extensions/lapack/lapack_py.cpp +++ b/dpnp/backend/extensions/lapack/lapack_py.cpp @@ -74,8 +74,8 @@ PYBIND11_MODULE(_lapack_impl, m) m.def("_getrf", &lapack_ext::getrf, "Call `getrf` from OneMKL LAPACK library to return " "the LU factorization of a general n x n matrix", - py::arg("sycl_queue"), py::arg("n"), py::arg("a_array"), - py::arg("ipiv_array"), py::arg("dev_info_array"), + py::arg("sycl_queue"), py::arg("a_array"), py::arg("ipiv_array"), + py::arg("dev_info_array"), py::arg("n"), py::arg("depends") = py::list()); m.def("_getrf_batch", &lapack_ext::getrf_batch, diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index ae29c30cb59d..41629f5c4430 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -305,10 +305,10 @@ def _lu_factor(a, res_type): # to perform LU decomposition on each batch in 'a_vecs[i]' ht_lapack_ev[i], _ = li._getrf( a_sycl_queue, - n, a_vecs[i].get_array(), ipiv_vecs[i].get_array(), dev_info_vecs[i].get_array(), + n, [a_copy_ev], ) @@ -345,10 +345,10 @@ def _lu_factor(a, res_type): # to perform LU decomposition on the input matrix ht_lapack_ev, _ = li._getrf( a_sycl_queue, - n, a_h.get_array(), ipiv_h.get_array(), dev_info_h.get_array(), + n, [a_copy_ev], ) diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index 90ccd2a4525d..aca7030c85cb 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -1393,11 +1393,11 @@ def test_solve(device): ) def test_slogdet(shape, is_empty, device): if is_empty: - numpy_x = numpy.empty(shape, dtype=dpnp.default_float_type()) + numpy_x = numpy.empty(shape, dtype=dpnp.default_float_type(device)) else: count_elem = numpy.prod(shape) numpy_x = numpy.arange( - 1, count_elem + 1, dtype=dpnp.default_float_type() + 1, count_elem + 1, dtype=dpnp.default_float_type(device) ).reshape(shape) dpnp_x = dpnp.array(numpy_x, device=device) From 80d8188215f6f388f8591d861715a253b56dbfbd Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 15 Dec 2023 19:44:02 +0100 Subject: [PATCH 30/55] A minor changes for _lu_factor --- dpnp/linalg/dpnp_utils_linalg.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 41629f5c4430..8323117a0b70 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -194,6 +194,8 @@ def _lu_factor(a, res_type): piv (dpnp.ndarray): 1-origin pivot indices with dimension ``(..., N)``. + dev_info (dpnp.ndarray): + ``getrf`` or `getrf_batch` info with dimension ``(...)``. See Also -------- @@ -201,17 +203,7 @@ def _lu_factor(a, res_type): """ - # TODO: use dpnp.linalg.LinAlgError - if a.ndim < 2: - raise ValueError( - f"{a.ndim}-dimensional array given. The input " - "array must be at least two-dimensional" - ) - - n, m = a.shape[-2:] - # TODO: use dpnp.linalg.LinAlgError - if m != n: - raise ValueError("Last 2 dimensions of the input array must be square") + n = a.shape[-2] a_sycl_queue = a.sycl_queue a_usm_type = a.usm_type From 628dd90795409168044b24185757d89ae45af620 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 15 Dec 2023 19:59:33 +0100 Subject: [PATCH 31/55] Remove trash files --- perf.py | 32 -------------------------------- qaz.py | 17 ----------------- test.py | 27 --------------------------- 3 files changed, 76 deletions(-) delete mode 100644 perf.py delete mode 100644 qaz.py delete mode 100644 test.py diff --git a/perf.py b/perf.py deleted file mode 100644 index de88e2c24132..000000000000 --- a/perf.py +++ /dev/null @@ -1,32 +0,0 @@ -import numpy -import dpnp - - -# na = numpy.random.randint(-10**4, 10**4, size=(4096,4096)) - -# a = numpy.array(na, dtype = "int32") - -# a_dp = dpnp.array(a,device='cpu') - -# def test(xp,dtype,device): -# na = numpy.random.randint(-10**4, 10**4, size=(4096,4096)) - -# a = numpy.array(na, dtype = dtype) -# a_dp = dpnp.array(a,device=device) - -# if xp == 0: -# numpy.linalg.slogdet(a) -# else: -# dpnp.linalg.slogdet(a_dp) - -def init(na, dtype): - # na = numpy.random.randint(-10**4, 10**4, size=(4096,4096)) - - a = numpy.array(na, dtype = dtype) - a_dp_cpu = dpnp.array(a,device='cpu') - a_dp_gpu = dpnp.array(a,device='gpu') - - numpy.linalg.slogdet(a) - dpnp.linalg.slogdet(a_dp_cpu) - - return a,a_dp_cpu,a_dp_gpu diff --git a/qaz.py b/qaz.py deleted file mode 100644 index 755f6b25e447..000000000000 --- a/qaz.py +++ /dev/null @@ -1,17 +0,0 @@ -import dpnp -import numpy - -a = numpy.array( - [ - [2, 3, 1, 4, 5], - [5, 6, 7, 8, 9], - [9, 7, 7, 2, 3], - [1, 4, 5, 1, 8], - [8, 9, 8, 5, 3], - ] -) - -a_dp = dpnp.array(a) - -print(numpy.linalg.slogdet(a)) -print(dpnp.linalg.slogdet(a_dp)) diff --git a/test.py b/test.py deleted file mode 100644 index fae55c253eb4..000000000000 --- a/test.py +++ /dev/null @@ -1,27 +0,0 @@ -import dpnp -import numpy -import cProfile, pstats, io - - -# na = numpy.random.randint(-10**4, 10**4, size=(4096,4096)) -na = numpy.random.randint(-10**4, 10**4, size=(8192,8192)) -a = numpy.array(na, dtype = "int32") -a_dp = dpnp.array(a, device='cpu') - -a_stride = a[::2,::2] -a_dp_stride = a_dp[::2,::2] -dpnp.linalg.slogdet(a_dp_stride) - -pr = cProfile.Profile() -pr.enable() -# res = numpy.linalg.slogdet(a) -res = dpnp.linalg.slogdet(a_dp_stride) -pr.disable() - -s = io.StringIO() -sortBy = pstats.SortKey.CUMULATIVE - -ps = pstats.Stats(pr, stream=s).sort_stats(sortBy) -ps.print_stats() - -print(s.getvalue()) From a8db460f9af4c15cf357af0232795abc24e2001e Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Mon, 18 Dec 2023 15:20:07 +0100 Subject: [PATCH 32/55] Use getrf_batch only on CPU --- dpnp/linalg/dpnp_utils_linalg.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 8323117a0b70..54053e625a42 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -209,7 +209,10 @@ def _lu_factor(a, res_type): a_usm_type = a.usm_type # TODO: Find out at which array sizes the best performance is obtained - use_batch = True + # getrf_batch implementation shows slow results with large arrays on GPU. + # Use getrf_batch only on CPU. + # On GPU call getrf for each two-dimensional array by loop + use_batch = a.sycl_device.has_aspect_cpu if a.ndim > 2: orig_shape = a.shape From e3cd5c4c02712b1fe81b15d916f1446c0d5e92c2 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Mon, 18 Dec 2023 15:33:53 +0100 Subject: [PATCH 33/55] Update tests for dpnp.linalg.slogdet --- tests/test_linalg.py | 2 +- .../cupy/linalg_tests/test_norms.py | 56 ++++++++++--------- 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/tests/test_linalg.py b/tests/test_linalg.py index 6847bb63e290..992f46fafc07 100644 --- a/tests/test_linalg.py +++ b/tests/test_linalg.py @@ -676,7 +676,7 @@ def test_slogdet_strides(self): assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4) # TODO: remove skipif when MKLD-16626 is resolved - @pytest.mark.skipif(is_cpu_device(), reason="MKL bug MKLD-16626") + @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626") @pytest.mark.parametrize( "matrix", [ diff --git a/tests/third_party/cupy/linalg_tests/test_norms.py b/tests/third_party/cupy/linalg_tests/test_norms.py index ea6f3ec3941e..05dbd4da2e0d 100644 --- a/tests/third_party/cupy/linalg_tests/test_norms.py +++ b/tests/third_party/cupy/linalg_tests/test_norms.py @@ -49,36 +49,40 @@ def test_det_empty_matrices(self, xp, dtype): def test_det_different_last_two_dims(self, dtype): for xp in (numpy, cupy): a = testing.shaped_arange((2, 3, 2), xp, dtype) - # TODO: replace ValueError with dpnp.linalg.LinAlgError - with pytest.raises((numpy.linalg.LinAlgError, ValueError)): + with pytest.raises( + (numpy.linalg.LinAlgError, cupy.linalg.LinAlgError) + ): xp.linalg.det(a) @testing.for_float_dtypes(no_float16=True) def test_det_different_last_two_dims_empty_batch(self, dtype): for xp in (numpy, cupy): a = xp.empty((0, 3, 2), dtype=dtype) - # TODO: replace ValueError with dpnp.linalg.LinAlgError - with pytest.raises((numpy.linalg.LinAlgError, ValueError)): + with pytest.raises( + (numpy.linalg.LinAlgError, cupy.linalg.LinAlgError) + ): xp.linalg.det(a) @testing.for_float_dtypes(no_float16=True) def test_det_one_dim(self, dtype): for xp in (numpy, cupy): a = testing.shaped_arange((2,), xp, dtype) - # TODO: replace ValueError with dpnp.linalg.LinAlgError - with pytest.raises((numpy.linalg.LinAlgError, ValueError)): + with pytest.raises( + (numpy.linalg.LinAlgError, cupy.linalg.LinAlgError) + ): xp.linalg.det(a) @testing.for_float_dtypes(no_float16=True) def test_det_zero_dim(self, dtype): for xp in (numpy, cupy): a = testing.shaped_arange((), xp, dtype) - # TODO: replace ValueError with dpnp.linalg.LinAlgError - with pytest.raises((numpy.linalg.LinAlgError, ValueError)): + with pytest.raises( + (numpy.linalg.LinAlgError, cupy.linalg.LinAlgError) + ): xp.linalg.det(a) # TODO: remove skipif when MKLD-16626 is resolved - @pytest.mark.skipif(is_cpu_device(), reason="MKL bug MKLD-16626") + @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626") @testing.for_float_dtypes(no_float16=True) @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) def test_det_singular(self, xp, dtype): @@ -109,7 +113,7 @@ def test_slogdet_4(self, xp, dtype): return sign, logdet # TODO: remove skipif when MKLD-16626 is resolved - @pytest.mark.skipif(is_cpu_device(), reason="MKL bug MKLD-16626") + @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626") @testing.for_dtypes("fd") @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) def test_slogdet_singular(self, xp, dtype): @@ -117,24 +121,24 @@ def test_slogdet_singular(self, xp, dtype): sign, logdet = xp.linalg.slogdet(a) return sign, logdet - # @testing.for_dtypes("fd") - # @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) - # def test_slogdet_singular_errstate(self, xp, dtype): - # a = xp.zeros((3, 3), dtype) - # with cupyx.errstate(linalg="raise"): - # # `cupy.linalg.slogdet` internally catches `dev_info < 0` from - # # cuSOLVER, which should not affect `dev_info > 0` cases. - # sign, logdet = xp.linalg.slogdet(a) - # return sign, logdet + # TODO: remove skipif when MKLD-16626 is resolved + @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626") + @testing.for_dtypes("fd") + @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) + def test_slogdet_singular_errstate(self, xp, dtype): + a = xp.zeros((3, 3), dtype=dtype) + # TODO: dpnp has not errstate. Probably to be implemented later + # with cupyx.errstate(linalg="raise"): + # `cupy.linalg.slogdet` internally catches `dev_info < 0` from + # cuSOLVER, which should not affect `dev_info > 0` cases. + sign, logdet = xp.linalg.slogdet(a) + return sign, logdet @testing.for_dtypes("fd") def test_slogdet_one_dim(self, dtype): for xp in (numpy, cupy): a = testing.shaped_arange((2,), xp, dtype) - if xp is numpy: - with pytest.raises(numpy.linalg.LinAlgError): - xp.linalg.slogdet(a) - else: - # Replace with dpnp.linalg.LinAlgError - with pytest.raises(ValueError): - xp.linalg.slogdet(a) + with pytest.raises( + (numpy.linalg.LinAlgError, cupy.linalg.LinAlgError) + ): + xp.linalg.slogdet(a) From 99f3618c88b14b2f430f255c1edaf6b98cf54579 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 20 Dec 2023 21:15:24 +0100 Subject: [PATCH 34/55] Address remarks --- dpnp/dpnp_algo/dpnp_algo.pxd | 1 - dpnp/linalg/dpnp_iface_linalg.py | 34 ++++------- dpnp/linalg/dpnp_utils_linalg.py | 59 ++++++++++--------- .../cupy/linalg_tests/test_norms.py | 34 ++++------- 4 files changed, 55 insertions(+), 73 deletions(-) diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd index 63f61e7ce94f..cdeb78acb848 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pxd +++ b/dpnp/dpnp_algo/dpnp_algo.pxd @@ -54,7 +54,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this na DPNP_FN_CUMSUM_EXT DPNP_FN_DEGREES DPNP_FN_DEGREES_EXT - DPNP_FN_DET DPNP_FN_DIAG_INDICES DPNP_FN_DIAG_INDICES_EXT DPNP_FN_DIAGONAL diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py index 384543e268ae..45b159428716 100644 --- a/dpnp/linalg/dpnp_iface_linalg.py +++ b/dpnp/linalg/dpnp_iface_linalg.py @@ -158,7 +158,7 @@ def det(a): Parameters ---------- - a : (..., M, M) dpnp.ndarray + a : (..., M, M) {dpnp.ndarray, usm_ndarray} Input array to compute determinants for. Returns @@ -194,10 +194,7 @@ def det(a): """ - if not dpnp.is_supported_array_type(a): - raise TypeError( - "An array must be any of supported type, but got {}".format(type(a)) - ) + dpnp.check_supported_arrays_type(a) sign, logdet = slogdet(a) return sign * dpnp.exp(logdet) @@ -664,28 +661,23 @@ def svd(x1, full_matrices=True, compute_uv=True, hermitian=False): def slogdet(a): """ - Returns sign and logarithm of the determinant of an array. - - It calculates the natural logarithm of the determinant of a given value. + Compute the sign and (natural) logarithm of the determinant of an array. For full documentation refer to :obj:`numpy.linalg.slogdet`. - Args - ---- - a (dpnp.ndarray): The input matrix with dimension ``(..., N, N)``. + Parameters + ---------- + a : (..., M, M) {dpnp.ndarray, usm_ndarray} + Input array, has to be a square 2-D array. Returns ------- - out : tuple of :class:`~dpnp.ndarray`: - It returns a tuple ``(sign, logdet)``. - ``sign`` represents each sign of the determinant as a real number - ``0``, ``1`` or ``-1``. - ``logdet`` represents the natural logarithm of the absolute of the - determinant. - If the determinant is zero, ``sign`` will be ``0`` and ``logdet`` - will be ``-inf``. - The shapes of both ``sign`` and ``logdet`` are equal to - ``a.shape[:-2]``. + sign : (...) dpnp.ndarray + A number representing the sign of the determinant. For a real matrix, + this is 1, 0, or -1. For a complex matrix, this is a complex number + with absolute value 1 (i.e., it is on the unit circle), or else 0. + logabsdet : (...) dpnp.ndarray + The natural log of the absolute value of the determinant. Limitations ----------- diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 54053e625a42..527d7a63c5ce 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -38,8 +38,8 @@ "check_stacked_2d", "check_stacked_square", "dpnp_eigh", - "dpnp_solve", "dpnp_slogdet", + "dpnp_solve", ] _jobz = {"N": 0, "V": 1} @@ -176,30 +176,30 @@ def _common_inexact_type(default_dtype, *dtypes): def _lu_factor(a, res_type): - """Compute pivoted LU decomposition. + """ + Compute pivoted LU decomposition. Decompose a given batch of square matrices. Inputs and outputs are transposed. - Args: - a (dpnp.ndarray): The input matrix with dimension ``(..., N, N)``. - The dimension condition is not checked. - res_type (dpnp.dtype): float32, float64, complex64 or complex128. + Parameters + ---------- + a : (..., M, M) {dpnp.ndarray, usm_ndarray} + Input array containing the matrices to be decomposed. + res_type : dpnp.dtype + Specifies the data type of the result. + Acceptable data types are float32, float64, complex64, or complex128. - Returns: + Returns + ------- tuple: - lu_t (dpnp.ndarray): - ``L`` without its unit diagonal and ``U`` with - dimension ``(..., N, N)``. - piv (dpnp.ndarray): - 1-origin pivot indices with dimension - ``(..., N)``. - dev_info (dpnp.ndarray): - ``getrf`` or `getrf_batch` info with dimension ``(...)``. - - See Also - -------- - :obj:`scipy.linalg.lu_factor` + lu_t : (..., N, N) {dpnp.ndarray, usm_ndarray} + Combined 'L' and 'U' matrices from LU decomposition + excluding the diagonal of 'L'. + piv : (..., N) {dpnp.ndarray, usm_ndarray} + 1-origin pivot indices indicating row permutations during decomposition. + dev_info : (...) {dpnp.ndarray, usm_ndarray} + Information on `getrf` or `getrf_batch` computation success (0 for success). """ @@ -312,23 +312,17 @@ def _lu_factor(a, res_type): a_ht_copy_ev[i].wait() # Reshape the results back to their original shape - out_v = dpnp.array(a_vecs, order="C").reshape(orig_shape) + out_a = dpnp.array(a_vecs, order="C").reshape(orig_shape) out_ipiv = dpnp.array(ipiv_vecs).reshape(orig_shape[:-1]) out_dev_info = dpnp.array(dev_info_vecs).reshape(orig_shape[:-2]) - return (out_v, out_ipiv, out_dev_info) + return (out_a, out_ipiv, out_dev_info) else: a_usm_arr = dpnp.get_usm_ndarray(a) # `a` must be copied because getrf destroys the input matrix a_h = dpnp.empty_like(a, order="C", dtype=res_type) - ipiv_h = dpnp.empty( - n, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue - ) - dev_info_h = dpnp.empty( - 1, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue - ) # use DPCTL tensor function to fill the сopy of the input array # from the input array @@ -336,6 +330,13 @@ def _lu_factor(a, res_type): src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a_sycl_queue ) + ipiv_h = dpnp.empty( + n, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue + ) + dev_info_h = dpnp.empty( + 1, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue + ) + # Call the LAPACK extension function _getrf # to perform LU decomposition on the input matrix ht_lapack_ev, _ = li._getrf( @@ -665,11 +666,11 @@ def dpnp_slogdet(a): ), axis=-1, ) - if res_type.kind == "f": + if dpnp.issubdtype(res_type, dpnp.floating): non_zero += dpnp.count_nonzero(diag < 0, axis=-1) sign = (non_zero % 2) * -2 + 1 - if res_type.kind == "c": + if dpnp.issubdtype(res_type, dpnp.complexfloating): sign = sign * dpnp.prod(diag / dpnp.abs(diag), axis=-1) sign = sign.astype(res_type) diff --git a/tests/third_party/cupy/linalg_tests/test_norms.py b/tests/third_party/cupy/linalg_tests/test_norms.py index 05dbd4da2e0d..7198be85d9af 100644 --- a/tests/third_party/cupy/linalg_tests/test_norms.py +++ b/tests/third_party/cupy/linalg_tests/test_norms.py @@ -49,36 +49,28 @@ def test_det_empty_matrices(self, xp, dtype): def test_det_different_last_two_dims(self, dtype): for xp in (numpy, cupy): a = testing.shaped_arange((2, 3, 2), xp, dtype) - with pytest.raises( - (numpy.linalg.LinAlgError, cupy.linalg.LinAlgError) - ): + with pytest.raises(xp.linalg.LinAlgError): xp.linalg.det(a) @testing.for_float_dtypes(no_float16=True) def test_det_different_last_two_dims_empty_batch(self, dtype): for xp in (numpy, cupy): a = xp.empty((0, 3, 2), dtype=dtype) - with pytest.raises( - (numpy.linalg.LinAlgError, cupy.linalg.LinAlgError) - ): + with pytest.raises(xp.linalg.LinAlgError): xp.linalg.det(a) @testing.for_float_dtypes(no_float16=True) def test_det_one_dim(self, dtype): for xp in (numpy, cupy): a = testing.shaped_arange((2,), xp, dtype) - with pytest.raises( - (numpy.linalg.LinAlgError, cupy.linalg.LinAlgError) - ): + with pytest.raises(xp.linalg.LinAlgError): xp.linalg.det(a) @testing.for_float_dtypes(no_float16=True) def test_det_zero_dim(self, dtype): for xp in (numpy, cupy): a = testing.shaped_arange((), xp, dtype) - with pytest.raises( - (numpy.linalg.LinAlgError, cupy.linalg.LinAlgError) - ): + with pytest.raises(xp.linalg.LinAlgError): xp.linalg.det(a) # TODO: remove skipif when MKLD-16626 is resolved @@ -91,21 +83,21 @@ def test_det_singular(self, xp, dtype): class TestSlogdet(unittest.TestCase): - @testing.for_dtypes("fd") + @testing.for_dtypes("fdFD") @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) def test_slogdet(self, xp, dtype): a = testing.shaped_arange((2, 2), xp, dtype) + 1 sign, logdet = xp.linalg.slogdet(a) return sign, logdet - @testing.for_dtypes("fd") + @testing.for_dtypes("fdFD") @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) def test_slogdet_3(self, xp, dtype): a = testing.shaped_arange((2, 2, 2), xp, dtype) + 1 sign, logdet = xp.linalg.slogdet(a) return sign, logdet - @testing.for_dtypes("fd") + @testing.for_dtypes("fdFD") @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) def test_slogdet_4(self, xp, dtype): a = testing.shaped_arange((2, 2, 2, 2), xp, dtype) + 1 @@ -114,7 +106,7 @@ def test_slogdet_4(self, xp, dtype): # TODO: remove skipif when MKLD-16626 is resolved @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626") - @testing.for_dtypes("fd") + @testing.for_dtypes("fdFD") @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) def test_slogdet_singular(self, xp, dtype): a = xp.zeros((3, 3), dtype=dtype) @@ -123,22 +115,20 @@ def test_slogdet_singular(self, xp, dtype): # TODO: remove skipif when MKLD-16626 is resolved @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626") - @testing.for_dtypes("fd") + @testing.for_dtypes("fdFD") @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) def test_slogdet_singular_errstate(self, xp, dtype): a = xp.zeros((3, 3), dtype=dtype) - # TODO: dpnp has not errstate. Probably to be implemented later + # TODO: dpnp has no errstate. Probably to be implemented later # with cupyx.errstate(linalg="raise"): # `cupy.linalg.slogdet` internally catches `dev_info < 0` from # cuSOLVER, which should not affect `dev_info > 0` cases. sign, logdet = xp.linalg.slogdet(a) return sign, logdet - @testing.for_dtypes("fd") + @testing.for_dtypes("fdFD") def test_slogdet_one_dim(self, dtype): for xp in (numpy, cupy): a = testing.shaped_arange((2,), xp, dtype) - with pytest.raises( - (numpy.linalg.LinAlgError, cupy.linalg.LinAlgError) - ): + with pytest.raises(xp.linalg.LinAlgError): xp.linalg.slogdet(a) From 1f9b6fa3afcbbcb956130575728497b53749484b Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 20 Dec 2023 22:31:30 +0100 Subject: [PATCH 35/55] Add _real_type func --- dpnp/linalg/dpnp_utils_linalg.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 527d7a63c5ce..56ef22c0e83b 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -45,6 +45,36 @@ _jobz = {"N": 0, "V": 1} _upper_lower = {"U": 0, "L": 1} +_real_types_map = { + "float32": "float32", # single : single + "float64": "float64", # double : double + "complex64": "float32", # csingle : csingle + "complex128": "float64", # cdouble : cdouble +} + + +def _real_type(dtype, device=None): + """ + Returns the real data type corresponding to a given dpnp data type. + + Parameters + ---------- + dtype : dpnp.dtype + The dtype for which to find the corresponding real data type. + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where an array of default floating type might be created. + + Returns + ------- + out : str + The name of the real data type. + + """ + + default = dpnp.default_float_type(device) + real_type = _real_types_map.get(dtype.name, default) + return dpnp.dtype(real_type) + def check_stacked_2d(*arrays): """ @@ -629,7 +659,7 @@ def dpnp_slogdet(a): a_sycl_queue = a.sycl_queue res_type = _common_type(a) - logdet_dtype = dpnp.dtype(res_type.char.lower()) + logdet_dtype = _real_type(res_type) a_shape = a.shape shape = a_shape[:-2] From 7e12063e42e427b76fb21ec8afd7fe7b3b3dd3a8 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 20 Dec 2023 22:35:02 +0100 Subject: [PATCH 36/55] Add test_det in test_usm_type --- tests/test_usm_type.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py index 8e5bc500a3c3..60e330b19529 100644 --- a/tests/test_usm_type.py +++ b/tests/test_usm_type.py @@ -569,3 +569,33 @@ def test_slogdet(shape, is_empty, usm_type): assert x.usm_type == sign.usm_type assert x.usm_type == logdet.usm_type + + +@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types) +@pytest.mark.parametrize( + "shape, is_empty", + [ + ((2, 2), False), + ((3, 2, 2), False), + ((0, 0), True), + ((0, 2, 2), True), + ], + ids=[ + "(2, 2)", + "(3, 2, 2)", + "(0, 0)", + "(0, 2, 2)", + ], +) +def test_det(shape, is_empty, usm_type): + if is_empty: + x = dp.empty(shape, dtype=dp.default_float_type(), usm_type=usm_type) + else: + count_elem = numpy.prod(shape) + x = dp.arange( + 1, count_elem + 1, dtype=dp.default_float_type(), usm_type=usm_type + ).reshape(shape) + + det = dp.linalg.det(x) + + assert x.usm_type == det.usm_type From 0e822584512a6a955c37a1c3dbfc2cd8bd65ea39 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 21 Dec 2023 12:48:56 +0100 Subject: [PATCH 37/55] Add more checks in getrf and getf_batch functions --- dpnp/backend/extensions/lapack/getrf.cpp | 44 ++++++++++++++++-- .../backend/extensions/lapack/getrf_batch.cpp | 46 +++++++++++++++++-- 2 files changed, 81 insertions(+), 9 deletions(-) diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp index 3fe7927a9a01..c09687f641e9 100644 --- a/dpnp/backend/extensions/lapack/getrf.cpp +++ b/dpnp/backend/extensions/lapack/getrf.cpp @@ -134,13 +134,41 @@ static sycl::event getrf_impl(sycl::queue exec_q, } std::pair - getrf(sycl::queue q, + getrf(sycl::queue exec_q, dpctl::tensor::usm_ndarray a_array, dpctl::tensor::usm_ndarray ipiv_array, dpctl::tensor::usm_ndarray dev_info_array, const std::int64_t n, const std::vector &depends) { + const int a_array_nd = a_array.get_ndim(); + const int ipiv_array_nd = ipiv_array.get_ndim(); + + if (a_array_nd != 2) { + throw py::value_error( + "The input array has ndim=" + std::to_string(a_array_nd) + + ", but a 2-dimensional array is expected."); + } + + if (ipiv_array_nd != 1) { + throw py::value_error("The array of pivot indices has ndim=" + + std::to_string(ipiv_array_nd) + + ", but a 1-dimensional array is expected."); + } + + // check compatibility of execution queue and allocation queue + if (!dpctl::utils::queues_are_compatible( + exec_q, {a_array, ipiv_array, dev_info_array})) + { + throw py::value_error( + "Execution queue is not compatible with allocation queues"); + } + + bool is_a_array_c_contig = a_array.is_c_contiguous(); + if (!is_a_array_c_contig) { + throw py::value_error("The input array " + "must be C-contiguous"); + } auto array_types = dpctl_td_ns::usm_ndarray_types(); int a_array_type_id = @@ -153,6 +181,14 @@ std::pair "of the input matrix."); } + auto ipiv_types = dpctl_td_ns::usm_ndarray_types(); + int ipiv_array_type_id = + ipiv_types.typenum_to_lookup_id(ipiv_array.get_typenum()); + + if (ipiv_array_type_id != static_cast(dpctl_td_ns::typenum_t::INT64)) { + throw py::value_error("The type of 'ipiv_array' must be int64."); + } + char *a_array_data = a_array.get_data(); const std::int64_t lda = std::max(1UL, n); @@ -164,11 +200,11 @@ std::pair reinterpret_cast(dev_info_array_data); std::vector host_task_events; - sycl::event getrf_ev = getrf_fn(q, n, a_array_data, lda, d_ipiv, d_dev_info, - host_task_events, depends); + sycl::event getrf_ev = getrf_fn(exec_q, n, a_array_data, lda, d_ipiv, + d_dev_info, host_task_events, depends); sycl::event args_ev = dpctl::utils::keep_args_alive( - q, {a_array, ipiv_array, dev_info_array}, host_task_events); + exec_q, {a_array, ipiv_array, dev_info_array}, host_task_events); return std::make_pair(args_ev, getrf_ev); } diff --git a/dpnp/backend/extensions/lapack/getrf_batch.cpp b/dpnp/backend/extensions/lapack/getrf_batch.cpp index b15fac74efee..86cb4c9aee93 100644 --- a/dpnp/backend/extensions/lapack/getrf_batch.cpp +++ b/dpnp/backend/extensions/lapack/getrf_batch.cpp @@ -147,7 +147,7 @@ static sycl::event getrf_batch_impl(sycl::queue exec_q, } std::pair - getrf_batch(sycl::queue q, + getrf_batch(sycl::queue exec_q, dpctl::tensor::usm_ndarray a_array, dpctl::tensor::usm_ndarray ipiv_array, dpctl::tensor::usm_ndarray dev_info_array, @@ -157,6 +157,34 @@ std::pair std::int64_t batch_size, const std::vector &depends) { + const int a_array_nd = a_array.get_ndim(); + const int ipiv_array_nd = ipiv_array.get_ndim(); + + if (a_array_nd < 3) { + throw py::value_error( + "The input array has ndim=" + std::to_string(a_array_nd) + + ", but an array with ndim >= 3 is expected."); + } + + if (ipiv_array_nd != 2) { + throw py::value_error("The array of pivot indices has ndim=" + + std::to_string(ipiv_array_nd) + + ", but a 2-dimensional array is expected."); + } + + // check compatibility of execution queue and allocation queue + if (!dpctl::utils::queues_are_compatible( + exec_q, {a_array, ipiv_array, dev_info_array})) + { + throw py::value_error( + "Execution queue is not compatible with allocation queues"); + } + + bool is_a_array_c_contig = a_array.is_c_contiguous(); + if (!is_a_array_c_contig) { + throw py::value_error("The input array " + "must be C-contiguous"); + } auto array_types = dpctl_td_ns::usm_ndarray_types(); int a_array_type_id = @@ -170,6 +198,14 @@ std::pair "of the input matrix."); } + auto ipiv_types = dpctl_td_ns::usm_ndarray_types(); + int ipiv_array_type_id = + ipiv_types.typenum_to_lookup_id(ipiv_array.get_typenum()); + + if (ipiv_array_type_id != static_cast(dpctl_td_ns::typenum_t::INT64)) { + throw py::value_error("The type of 'ipiv_array' must be int64."); + } + char *a_array_data = a_array.get_data(); const std::int64_t lda = std::max(1UL, n); @@ -181,12 +217,12 @@ std::pair reinterpret_cast(dev_info_array_data); std::vector host_task_events; - sycl::event getrf_batch_ev = - getrf_batch_fn(q, n, a_array_data, lda, stride_a, d_ipiv, stride_ipiv, - batch_size, d_dev_info, host_task_events, depends); + sycl::event getrf_batch_ev = getrf_batch_fn( + exec_q, n, a_array_data, lda, stride_a, d_ipiv, stride_ipiv, batch_size, + d_dev_info, host_task_events, depends); sycl::event args_ev = dpctl::utils::keep_args_alive( - q, {a_array, ipiv_array, dev_info_array}, host_task_events); + exec_q, {a_array, ipiv_array, dev_info_array}, host_task_events); return std::make_pair(args_ev, getrf_batch_ev); } From 3a6e5ce4622b6c1437b20341aec960434cba85c3 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 21 Dec 2023 14:11:55 +0100 Subject: [PATCH 38/55] Improve error handler in getrf_impl --- dpnp/backend/extensions/lapack/getrf.cpp | 48 +++++++++++++++++------- 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp index c09687f641e9..b28f42621be6 100644 --- a/dpnp/backend/extensions/lapack/getrf.cpp +++ b/dpnp/backend/extensions/lapack/getrf.cpp @@ -77,6 +77,9 @@ static sycl::event getrf_impl(sycl::queue exec_q, std::stringstream error_msg; std::int64_t info = 0; + bool mkl_exception_caught = false; + bool mkl_singular_exception_caught = false; + bool sycl_exception_caught = false; sycl::event getrf_event; try { @@ -96,33 +99,52 @@ static sycl::event getrf_impl(sycl::queue exec_q, // routine for storing intermediate results. scratchpad_size, depends); } catch (mkl_lapack::exception const &e) { - error_msg - << "Unexpected MKL exception caught during getrf() call:\nreason: " - << e.what() << "\ninfo: " << e.info(); info = e.info(); + + if (info < 0) { + error_msg << "Parameter number " << -info + << " had an illegal value."; + mkl_exception_caught = true; + } + else if (info == scratchpad_size && e.detail() != 0) { + error_msg + << "Insufficient scratchpad size. Required size is at least " + << e.detail(); + mkl_exception_caught = true; + } + else if (info > 0) { + mkl_singular_exception_caught = true; + } + else { + error_msg << "Unexpected MKL exception caught during getrf() " + "call:\nreason: " + << e.what() << "\ninfo: " << e.info(); + mkl_exception_caught = true; + } } catch (sycl::exception const &e) { error_msg << "Unexpected SYCL exception caught during getrf() call:\n" << e.what(); - info = -1; + sycl_exception_caught = true; } - if (info != 0) // an unexpected error occurs + if (mkl_exception_caught || + sycl_exception_caught) // an unexpected error occurs { if (scratchpad != nullptr) { sycl::free(scratchpad, exec_q); } - if (info < 0) { - throw std::runtime_error(error_msg.str()); - } + throw std::runtime_error(error_msg.str()); } - sycl::event write_info_event = exec_q.submit([&](sycl::handler &cgh) { - cgh.depends_on(getrf_event); - cgh.single_task([=]() { dev_info[0] = info; }); - }); + if (mkl_singular_exception_caught) { + sycl::event write_info_event = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(getrf_event); + cgh.single_task([=]() { dev_info[0] = info; }); + }); - host_task_events.push_back(write_info_event); + host_task_events.push_back(write_info_event); + } sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(getrf_event); From 6700fce216493ab4d2683919c5bf428ee7258057 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 21 Dec 2023 15:05:54 +0100 Subject: [PATCH 39/55] Improve error handler in getrf_batch_impl --- .../backend/extensions/lapack/getrf_batch.cpp | 53 +++++++++++++------ 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/dpnp/backend/extensions/lapack/getrf_batch.cpp b/dpnp/backend/extensions/lapack/getrf_batch.cpp index 86cb4c9aee93..4bf69fdc73b7 100644 --- a/dpnp/backend/extensions/lapack/getrf_batch.cpp +++ b/dpnp/backend/extensions/lapack/getrf_batch.cpp @@ -86,6 +86,9 @@ static sycl::event getrf_batch_impl(sycl::queue exec_q, std::stringstream error_msg; std::int64_t info = 0; + bool mkl_exception_caught = false; + bool mkl_singular_exception_caught = false; + bool sycl_exception_caught = false; sycl::event getrf_batch_event; try { @@ -109,33 +112,53 @@ static sycl::event getrf_batch_impl(sycl::queue exec_q, // routine for storing intermediate results. scratchpad_size, depends); } catch (mkl_lapack::exception const &e) { - error_msg - << "Unexpected MKL exception caught during getrf() call:\nreason: " - << e.what() << "\ninfo: " << e.info(); info = e.info(); + + if (info < 0) { + error_msg << "Parameter number " << -info + << " had an illegal value."; + mkl_exception_caught = true; + } + else if (info == scratchpad_size && e.detail() != 0) { + error_msg + << "Insufficient scratchpad size. Required size is at least " + << e.detail(); + mkl_exception_caught = true; + } + else if (info > 0) { + mkl_singular_exception_caught = true; + } + else { + error_msg << "Unexpected MKL exception caught during getrf_batch() " + "call:\nreason: " + << e.what() << "\ninfo: " << e.info(); + mkl_exception_caught = true; + } } catch (sycl::exception const &e) { - error_msg << "Unexpected SYCL exception caught during getrf() call:\n" - << e.what(); - info = -1; + error_msg + << "Unexpected SYCL exception caught during getrf_batch() call:\n" + << e.what(); + sycl_exception_caught = true; } - if (info != 0) // an unexpected error occurs + if (mkl_exception_caught || + sycl_exception_caught) // an unexpected error occurs { if (scratchpad != nullptr) { sycl::free(scratchpad, exec_q); } - if (info < 0) { - throw std::runtime_error(error_msg.str()); - } + throw std::runtime_error(error_msg.str()); } - sycl::event write_info_event = exec_q.submit([&](sycl::handler &cgh) { - cgh.depends_on(getrf_batch_event); - cgh.single_task([=]() { dev_info[0] = info; }); - }); + if (mkl_singular_exception_caught) { + sycl::event write_info_event = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(getrf_batch_event); + cgh.single_task([=]() { dev_info[0] = info; }); + }); - host_task_events.push_back(write_info_event); + host_task_events.push_back(write_info_event); + } sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(getrf_batch_event); From 644485b37f199d7935d0e7c8ebdc55f4ff0ade55 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 21 Dec 2023 15:09:11 +0100 Subject: [PATCH 40/55] dev_info is allocated as zeros --- dpnp/linalg/dpnp_utils_linalg.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 56ef22c0e83b..dcf5da31da6f 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -260,8 +260,8 @@ def _lu_factor(a, res_type): usm_type=a_usm_type, sycl_queue=a_sycl_queue, ) - dev_info_h = dpnp.empty( - 1, + dev_info_h = dpnp.zeros( + (batch_size,), dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue, @@ -319,7 +319,7 @@ def _lu_factor(a, res_type): usm_type=a_usm_type, sycl_queue=a_sycl_queue, ) - dev_info_vecs[i] = dpnp.empty( + dev_info_vecs[i] = dpnp.zeros( (1,), dtype=dpnp.int64, usm_type=a_usm_type, @@ -363,8 +363,8 @@ def _lu_factor(a, res_type): ipiv_h = dpnp.empty( n, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue ) - dev_info_h = dpnp.empty( - 1, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue + dev_info_h = dpnp.zeros( + (1,), dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue ) # Call the LAPACK extension function _getrf From 5015e15c64b9484353f6aaaac912b6bca1b1bf99 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 21 Dec 2023 15:13:40 +0100 Subject: [PATCH 41/55] Remove skipif for singular tests --- tests/test_linalg.py | 2 -- tests/third_party/cupy/linalg_tests/test_norms.py | 6 ------ 2 files changed, 8 deletions(-) diff --git a/tests/test_linalg.py b/tests/test_linalg.py index 992f46fafc07..a061f7e65be1 100644 --- a/tests/test_linalg.py +++ b/tests/test_linalg.py @@ -675,8 +675,6 @@ def test_slogdet_strides(self): assert_allclose(sign_expected, sign_result) assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4) - # TODO: remove skipif when MKLD-16626 is resolved - @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626") @pytest.mark.parametrize( "matrix", [ diff --git a/tests/third_party/cupy/linalg_tests/test_norms.py b/tests/third_party/cupy/linalg_tests/test_norms.py index 7198be85d9af..bcf2c21d27f8 100644 --- a/tests/third_party/cupy/linalg_tests/test_norms.py +++ b/tests/third_party/cupy/linalg_tests/test_norms.py @@ -73,8 +73,6 @@ def test_det_zero_dim(self, dtype): with pytest.raises(xp.linalg.LinAlgError): xp.linalg.det(a) - # TODO: remove skipif when MKLD-16626 is resolved - @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626") @testing.for_float_dtypes(no_float16=True) @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) def test_det_singular(self, xp, dtype): @@ -104,8 +102,6 @@ def test_slogdet_4(self, xp, dtype): sign, logdet = xp.linalg.slogdet(a) return sign, logdet - # TODO: remove skipif when MKLD-16626 is resolved - @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626") @testing.for_dtypes("fdFD") @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) def test_slogdet_singular(self, xp, dtype): @@ -113,8 +109,6 @@ def test_slogdet_singular(self, xp, dtype): sign, logdet = xp.linalg.slogdet(a) return sign, logdet - # TODO: remove skipif when MKLD-16626 is resolved - @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626") @testing.for_dtypes("fdFD") @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) def test_slogdet_singular_errstate(self, xp, dtype): From 68c436e7d0bb4853274678f1117f8778b7b78d1c Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 22 Dec 2023 00:18:14 +0100 Subject: [PATCH 42/55] Implement _lu_factor logic with dev_info as a python list --- dpnp/backend/extensions/lapack/getrf.cpp | 35 ++++++---------- dpnp/backend/extensions/lapack/getrf.hpp | 4 +- .../backend/extensions/lapack/getrf_batch.cpp | 35 ++++++---------- dpnp/linalg/dpnp_utils_linalg.py | 40 +++++++++---------- 4 files changed, 45 insertions(+), 69 deletions(-) diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp index b28f42621be6..dcef32bda031 100644 --- a/dpnp/backend/extensions/lapack/getrf.cpp +++ b/dpnp/backend/extensions/lapack/getrf.cpp @@ -51,7 +51,7 @@ typedef sycl::event (*getrf_impl_fn_ptr_t)(sycl::queue, char *, std::int64_t, std::int64_t *, - std::int64_t *, + py::list, std::vector &, const std::vector &); @@ -63,7 +63,7 @@ static sycl::event getrf_impl(sycl::queue exec_q, char *in_a, std::int64_t lda, std::int64_t *ipiv, - std::int64_t *dev_info, + py::list dev_info, std::vector &host_task_events, const std::vector &depends) { @@ -78,7 +78,6 @@ static sycl::event getrf_impl(sycl::queue exec_q, std::stringstream error_msg; std::int64_t info = 0; bool mkl_exception_caught = false; - bool mkl_singular_exception_caught = false; bool sycl_exception_caught = false; sycl::event getrf_event; @@ -113,7 +112,12 @@ static sycl::event getrf_impl(sycl::queue exec_q, mkl_exception_caught = true; } else if (info > 0) { - mkl_singular_exception_caught = true; + // Store the positive 'info' value in the first element of + // 'dev_info'. This indicates that the factorization has been + // completed, but the factor U (upper triangular matrix) is exactly + // singular. The 'info' value here is the index of the first zero + // element in the diagonal of U. + dev_info[0] = info; } else { error_msg << "Unexpected MKL exception caught during getrf() " @@ -137,15 +141,6 @@ static sycl::event getrf_impl(sycl::queue exec_q, throw std::runtime_error(error_msg.str()); } - if (mkl_singular_exception_caught) { - sycl::event write_info_event = exec_q.submit([&](sycl::handler &cgh) { - cgh.depends_on(getrf_event); - cgh.single_task([=]() { dev_info[0] = info; }); - }); - - host_task_events.push_back(write_info_event); - } - sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(getrf_event); auto ctx = exec_q.get_context(); @@ -159,7 +154,7 @@ std::pair getrf(sycl::queue exec_q, dpctl::tensor::usm_ndarray a_array, dpctl::tensor::usm_ndarray ipiv_array, - dpctl::tensor::usm_ndarray dev_info_array, + py::list dev_info, const std::int64_t n, const std::vector &depends) { @@ -179,9 +174,7 @@ std::pair } // check compatibility of execution queue and allocation queue - if (!dpctl::utils::queues_are_compatible( - exec_q, {a_array, ipiv_array, dev_info_array})) - { + if (!dpctl::utils::queues_are_compatible(exec_q, {a_array, ipiv_array})) { throw py::value_error( "Execution queue is not compatible with allocation queues"); } @@ -217,16 +210,12 @@ std::pair char *ipiv_array_data = ipiv_array.get_data(); std::int64_t *d_ipiv = reinterpret_cast(ipiv_array_data); - char *dev_info_array_data = dev_info_array.get_data(); - std::int64_t *d_dev_info = - reinterpret_cast(dev_info_array_data); - std::vector host_task_events; sycl::event getrf_ev = getrf_fn(exec_q, n, a_array_data, lda, d_ipiv, - d_dev_info, host_task_events, depends); + dev_info, host_task_events, depends); sycl::event args_ev = dpctl::utils::keep_args_alive( - exec_q, {a_array, ipiv_array, dev_info_array}, host_task_events); + exec_q, {a_array, ipiv_array}, host_task_events); return std::make_pair(args_ev, getrf_ev); } diff --git a/dpnp/backend/extensions/lapack/getrf.hpp b/dpnp/backend/extensions/lapack/getrf.hpp index 38db17bd9a03..6b9a4891bfd8 100644 --- a/dpnp/backend/extensions/lapack/getrf.hpp +++ b/dpnp/backend/extensions/lapack/getrf.hpp @@ -42,7 +42,7 @@ extern std::pair getrf(sycl::queue exec_q, dpctl::tensor::usm_ndarray a_array, dpctl::tensor::usm_ndarray ipiv_array, - dpctl::tensor::usm_ndarray dev_info_array, + py::list dev_info, const std::int64_t n, const std::vector &depends = {}); @@ -50,7 +50,7 @@ extern std::pair getrf_batch(sycl::queue exec_q, dpctl::tensor::usm_ndarray a_array, dpctl::tensor::usm_ndarray ipiv_array, - dpctl::tensor::usm_ndarray dev_info_array, + py::list dev_info, std::int64_t n, std::int64_t stride_a, std::int64_t stride_ipiv, diff --git a/dpnp/backend/extensions/lapack/getrf_batch.cpp b/dpnp/backend/extensions/lapack/getrf_batch.cpp index 4bf69fdc73b7..22ee25f90745 100644 --- a/dpnp/backend/extensions/lapack/getrf_batch.cpp +++ b/dpnp/backend/extensions/lapack/getrf_batch.cpp @@ -55,7 +55,7 @@ typedef sycl::event (*getrf_batch_impl_fn_ptr_t)( std::int64_t *, std::int64_t, std::int64_t, - std::int64_t *, + py::list, std::vector &, const std::vector &); @@ -71,7 +71,7 @@ static sycl::event getrf_batch_impl(sycl::queue exec_q, std::int64_t *ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::int64_t *dev_info, + py::list dev_info, std::vector &host_task_events, const std::vector &depends) { @@ -87,7 +87,6 @@ static sycl::event getrf_batch_impl(sycl::queue exec_q, std::stringstream error_msg; std::int64_t info = 0; bool mkl_exception_caught = false; - bool mkl_singular_exception_caught = false; bool sycl_exception_caught = false; sycl::event getrf_batch_event; @@ -126,7 +125,12 @@ static sycl::event getrf_batch_impl(sycl::queue exec_q, mkl_exception_caught = true; } else if (info > 0) { - mkl_singular_exception_caught = true; + // Store the positive 'info' value in the first element of + // 'dev_info'. This indicates that the factorization has been + // completed, but the factor U (upper triangular matrix) is exactly + // singular. The 'info' value here is the index of the first zero + // element in the diagonal of U. + dev_info[0] = info; } else { error_msg << "Unexpected MKL exception caught during getrf_batch() " @@ -151,15 +155,6 @@ static sycl::event getrf_batch_impl(sycl::queue exec_q, throw std::runtime_error(error_msg.str()); } - if (mkl_singular_exception_caught) { - sycl::event write_info_event = exec_q.submit([&](sycl::handler &cgh) { - cgh.depends_on(getrf_batch_event); - cgh.single_task([=]() { dev_info[0] = info; }); - }); - - host_task_events.push_back(write_info_event); - } - sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(getrf_batch_event); auto ctx = exec_q.get_context(); @@ -173,7 +168,7 @@ std::pair getrf_batch(sycl::queue exec_q, dpctl::tensor::usm_ndarray a_array, dpctl::tensor::usm_ndarray ipiv_array, - dpctl::tensor::usm_ndarray dev_info_array, + py::list dev_info, std::int64_t n, std::int64_t stride_a, std::int64_t stride_ipiv, @@ -196,9 +191,7 @@ std::pair } // check compatibility of execution queue and allocation queue - if (!dpctl::utils::queues_are_compatible( - exec_q, {a_array, ipiv_array, dev_info_array})) - { + if (!dpctl::utils::queues_are_compatible(exec_q, {a_array, ipiv_array})) { throw py::value_error( "Execution queue is not compatible with allocation queues"); } @@ -235,17 +228,13 @@ std::pair char *ipiv_array_data = ipiv_array.get_data(); std::int64_t *d_ipiv = reinterpret_cast(ipiv_array_data); - char *dev_info_array_data = dev_info_array.get_data(); - std::int64_t *d_dev_info = - reinterpret_cast(dev_info_array_data); - std::vector host_task_events; sycl::event getrf_batch_ev = getrf_batch_fn( exec_q, n, a_array_data, lda, stride_a, d_ipiv, stride_ipiv, batch_size, - d_dev_info, host_task_events, depends); + dev_info, host_task_events, depends); sycl::event args_ev = dpctl::utils::keep_args_alive( - exec_q, {a_array, ipiv_array, dev_info_array}, host_task_events); + exec_q, {a_array, ipiv_array}, host_task_events); return std::make_pair(args_ev, getrf_batch_ev); } diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index dcf5da31da6f..655872a291fb 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -260,12 +260,7 @@ def _lu_factor(a, res_type): usm_type=a_usm_type, sycl_queue=a_sycl_queue, ) - dev_info_h = dpnp.zeros( - (batch_size,), - dtype=dpnp.int64, - usm_type=a_usm_type, - sycl_queue=a_sycl_queue, - ) + dev_info_h = [0] * batch_size a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a_sycl_queue @@ -280,7 +275,7 @@ def _lu_factor(a, res_type): a_sycl_queue, a_h.get_array(), ipiv_h.get_array(), - dev_info_h.get_array(), + dev_info_h, n, a_stride, ipiv_stride, @@ -291,7 +286,11 @@ def _lu_factor(a, res_type): ht_lapack_ev.wait() a_ht_copy_ev.wait() - return (a_h, ipiv_h, dev_info_h) + dev_info_array = dpnp.array( + dev_info_h, usm_type=a_usm_type, sycl_queue=a_sycl_queue + ) + + return (a_h, ipiv_h, dev_info_array) else: # Initialize lists for storing arrays and events for each batch @@ -319,12 +318,7 @@ def _lu_factor(a, res_type): usm_type=a_usm_type, sycl_queue=a_sycl_queue, ) - dev_info_vecs[i] = dpnp.zeros( - (1,), - dtype=dpnp.int64, - usm_type=a_usm_type, - sycl_queue=a_sycl_queue, - ) + dev_info_vecs[i] = [0] # Call the LAPACK extension function _getrf # to perform LU decomposition on each batch in 'a_vecs[i]' @@ -332,7 +326,7 @@ def _lu_factor(a, res_type): a_sycl_queue, a_vecs[i].get_array(), ipiv_vecs[i].get_array(), - dev_info_vecs[i].get_array(), + dev_info_vecs[i], n, [a_copy_ev], ) @@ -344,7 +338,9 @@ def _lu_factor(a, res_type): # Reshape the results back to their original shape out_a = dpnp.array(a_vecs, order="C").reshape(orig_shape) out_ipiv = dpnp.array(ipiv_vecs).reshape(orig_shape[:-1]) - out_dev_info = dpnp.array(dev_info_vecs).reshape(orig_shape[:-2]) + out_dev_info = dpnp.array( + dev_info_vecs, usm_type=a_usm_type, sycl_queue=a_sycl_queue + ).reshape(orig_shape[:-2]) return (out_a, out_ipiv, out_dev_info) @@ -363,9 +359,7 @@ def _lu_factor(a, res_type): ipiv_h = dpnp.empty( n, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue ) - dev_info_h = dpnp.zeros( - (1,), dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue - ) + dev_info_h = [0] # Call the LAPACK extension function _getrf # to perform LU decomposition on the input matrix @@ -373,7 +367,7 @@ def _lu_factor(a, res_type): a_sycl_queue, a_h.get_array(), ipiv_h.get_array(), - dev_info_h.get_array(), + dev_info_h, n, [a_copy_ev], ) @@ -381,10 +375,14 @@ def _lu_factor(a, res_type): ht_lapack_ev.wait() a_ht_copy_ev.wait() + dev_info_array = dpnp.array( + dev_info_h, usm_type=a_usm_type, sycl_queue=a_sycl_queue + ) + # Return a tuple containing the factorized matrix 'a_h', # pivot indices 'ipiv_h' # and the status 'dev_info_h' from the LAPACK getrf call - return (a_h, ipiv_h, dev_info_h) + return (a_h, ipiv_h, dev_info_array) def dpnp_eigh(a, UPLO): From 030a083acef3cb5f81d7b0b30007230a8937192e Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 22 Dec 2023 00:51:01 +0100 Subject: [PATCH 43/55] Update getrf_rf error handler with mkl_lapack::batch_error --- .../backend/extensions/lapack/getrf_batch.cpp | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/dpnp/backend/extensions/lapack/getrf_batch.cpp b/dpnp/backend/extensions/lapack/getrf_batch.cpp index 22ee25f90745..20a03a085074 100644 --- a/dpnp/backend/extensions/lapack/getrf_batch.cpp +++ b/dpnp/backend/extensions/lapack/getrf_batch.cpp @@ -110,33 +110,35 @@ static sycl::event getrf_batch_impl(sycl::queue exec_q, scratchpad, // Pointer to scratchpad memory to be used by MKL // routine for storing intermediate results. scratchpad_size, depends); + } catch (mkl_lapack::batch_error const &be) { + // Get the indices of matrices within the batch that encountered an + // error + auto error_matrices_ids = be.ids(); + // Get the indices of the first zero diagonal elements of these matrices + auto error_info = be.exceptions(); + + for (size_t i = 0; i < error_matrices_ids.size(); ++i) { + // Assign the index of the first zero diagonal element in each + // error matrix to the corresponding index in 'dev_info' + dev_info[error_matrices_ids[i]] = error_info[i]; + } } catch (mkl_lapack::exception const &e) { + mkl_exception_caught = true; info = e.info(); if (info < 0) { error_msg << "Parameter number " << -info << " had an illegal value."; - mkl_exception_caught = true; } else if (info == scratchpad_size && e.detail() != 0) { error_msg << "Insufficient scratchpad size. Required size is at least " << e.detail(); - mkl_exception_caught = true; - } - else if (info > 0) { - // Store the positive 'info' value in the first element of - // 'dev_info'. This indicates that the factorization has been - // completed, but the factor U (upper triangular matrix) is exactly - // singular. The 'info' value here is the index of the first zero - // element in the diagonal of U. - dev_info[0] = info; } else { error_msg << "Unexpected MKL exception caught during getrf_batch() " "call:\nreason: " << e.what() << "\ninfo: " << e.info(); - mkl_exception_caught = true; } } catch (sycl::exception const &e) { error_msg From 579b4e590322f0359f68257a095afff0553ab7b5 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 22 Dec 2023 01:08:51 +0100 Subject: [PATCH 44/55] Remove passing n parameter to _getrf --- dpnp/backend/extensions/lapack/getrf.cpp | 3 ++- dpnp/backend/extensions/lapack/getrf.hpp | 1 - dpnp/backend/extensions/lapack/lapack_py.cpp | 3 +-- dpnp/linalg/dpnp_utils_linalg.py | 2 -- 4 files changed, 3 insertions(+), 6 deletions(-) diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp index dcef32bda031..8d0e683df07f 100644 --- a/dpnp/backend/extensions/lapack/getrf.cpp +++ b/dpnp/backend/extensions/lapack/getrf.cpp @@ -155,7 +155,6 @@ std::pair dpctl::tensor::usm_ndarray a_array, dpctl::tensor::usm_ndarray ipiv_array, py::list dev_info, - const std::int64_t n, const std::vector &depends) { const int a_array_nd = a_array.get_ndim(); @@ -204,6 +203,8 @@ std::pair throw py::value_error("The type of 'ipiv_array' must be int64."); } + const std::int64_t n = a_array.get_shape_raw()[0]; + char *a_array_data = a_array.get_data(); const std::int64_t lda = std::max(1UL, n); diff --git a/dpnp/backend/extensions/lapack/getrf.hpp b/dpnp/backend/extensions/lapack/getrf.hpp index 6b9a4891bfd8..d8ea425f6804 100644 --- a/dpnp/backend/extensions/lapack/getrf.hpp +++ b/dpnp/backend/extensions/lapack/getrf.hpp @@ -43,7 +43,6 @@ extern std::pair dpctl::tensor::usm_ndarray a_array, dpctl::tensor::usm_ndarray ipiv_array, py::list dev_info, - const std::int64_t n, const std::vector &depends = {}); extern std::pair diff --git a/dpnp/backend/extensions/lapack/lapack_py.cpp b/dpnp/backend/extensions/lapack/lapack_py.cpp index 2b5ad61678f6..3c6ba80575da 100644 --- a/dpnp/backend/extensions/lapack/lapack_py.cpp +++ b/dpnp/backend/extensions/lapack/lapack_py.cpp @@ -75,8 +75,7 @@ PYBIND11_MODULE(_lapack_impl, m) "Call `getrf` from OneMKL LAPACK library to return " "the LU factorization of a general n x n matrix", py::arg("sycl_queue"), py::arg("a_array"), py::arg("ipiv_array"), - py::arg("dev_info_array"), py::arg("n"), - py::arg("depends") = py::list()); + py::arg("dev_info"), py::arg("depends") = py::list()); m.def("_getrf_batch", &lapack_ext::getrf_batch, "Call `getrf_batch` from OneMKL LAPACK library to return " diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 655872a291fb..fdeabfa38ed4 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -327,7 +327,6 @@ def _lu_factor(a, res_type): a_vecs[i].get_array(), ipiv_vecs[i].get_array(), dev_info_vecs[i], - n, [a_copy_ev], ) @@ -368,7 +367,6 @@ def _lu_factor(a, res_type): a_h.get_array(), ipiv_h.get_array(), dev_info_h, - n, [a_copy_ev], ) From acd04b7ed85f8c365fb03fca1c30c1fb298d5ccc Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 22 Dec 2023 01:20:35 +0100 Subject: [PATCH 45/55] Add a new test_slogdet_singular_matrix_3D test --- tests/test_linalg.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/test_linalg.py b/tests/test_linalg.py index a061f7e65be1..7bf21bd5a75b 100644 --- a/tests/test_linalg.py +++ b/tests/test_linalg.py @@ -704,6 +704,22 @@ def test_slogdet_singular_matrix(self, matrix): assert_allclose(sign_expected, sign_result) assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4) + # TODO: remove skipif when MKLD-16626 is resolved + # _getrf_batch does not raise an error with singular matrices. + # Skip running on cpu because dpnp uses _getrf_batch only on cpu. + @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626") + def test_slogdet_singular_matrix_3D(self): + a_np = numpy.array( + [[[1, 2], [3, 4]], [[1, 2], [1, 2]], [[1, 3], [3, 1]]] + ) + a_dp = inp.array(a_np) + + sign_expected, logdet_expected = numpy.linalg.slogdet(a_np) + sign_result, logdet_result = inp.linalg.slogdet(a_dp) + + assert_allclose(sign_expected, sign_result) + assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4) + def test_solve_errors(self): a_dp = inp.array([[1, 2], [3, 5]], dtype="float32") From f0cc4d3b808c2a4344d93873e84be83a3f3cb377 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 22 Dec 2023 20:04:08 +0100 Subject: [PATCH 46/55] Update tests for dpnp.linalg.det --- tests/test_linalg.py | 136 ++++++++++++++++++++++++++++++++----------- 1 file changed, 102 insertions(+), 34 deletions(-) diff --git a/tests/test_linalg.py b/tests/test_linalg.py index 7bf21bd5a75b..8c1085e64684 100644 --- a/tests/test_linalg.py +++ b/tests/test_linalg.py @@ -105,46 +105,114 @@ def test_cond(arr, p): assert_array_equal(expected, result) -@pytest.mark.skipif(is_cpu_device(), reason="MKL bug MKLD-16626") -@pytest.mark.parametrize( - "array", - [ - [[0, 0], [0, 0]], - [[1, 2], [1, 2]], - [[1, 2], [3, 4]], - [[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]], +class TestDet: + @pytest.mark.parametrize( + "array", [ - [[[1, 2], [3, 4]], [[1, 2], [2, 1]]], - [[[1, 3], [3, 1]], [[0, 1], [1, 3]]], + [[1, 2], [3, 4]], + [[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]], + [ + [[[1, 2], [3, 4]], [[1, 2], [2, 1]]], + [[[1, 3], [3, 1]], [[0, 1], [1, 3]]], + ], ], - ], - ids=[ - "[[0, 0], [0, 0]]", - "[[1, 2], [1, 2]]", - "[[1, 2], [3, 4]]", - "[[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]]", - "[[[[1, 2], [3, 4]], [[1, 2], [2, 1]]], [[[1, 3], [3, 1]], [[0, 1], [1, 3]]]]", - ], -) -def test_det(array): - a = numpy.array(array) - ia = inp.array(a) - result = inp.linalg.det(ia) - expected = numpy.linalg.det(a) - assert_allclose(expected, result) + ids=[ + "2D_array", + "3D_array", + "4D_array", + ], + ) + def test_det(self, array): + a = numpy.array(array) + ia = inp.array(a) + result = inp.linalg.det(ia) + expected = numpy.linalg.det(a) + assert_allclose(expected, result) + + def test_det_strides(self): + a_np = numpy.array( + [ + [2, 3, 1, 4, 5], + [5, 6, 7, 8, 9], + [9, 7, 7, 2, 3], + [1, 4, 5, 1, 8], + [8, 9, 8, 5, 3], + ] + ) + a_dp = inp.array(a_np) -def test_det_empty(): - a = numpy.empty((0, 0, 2, 2), dtype=numpy.float32) - ia = inp.array(a) + # positive strides + expected = numpy.linalg.det(a_np[::2, ::2]) + result = inp.linalg.det(a_dp[::2, ::2]) + assert_allclose(expected, result, rtol=1e-3, atol=1e-4) - np_det = numpy.linalg.det(a) - dpnp_det = inp.linalg.det(ia) + # negative strides + expected = numpy.linalg.det(a_np[::-2, ::-2]) + result = inp.linalg.det(a_dp[::-2, ::-2]) + assert_allclose(expected, result, rtol=1e-3, atol=1e-4) + + def test_det_empty(self): + a = numpy.empty((0, 0, 2, 2), dtype=numpy.float32) + ia = inp.array(a) - assert dpnp_det.dtype == np_det.dtype - assert dpnp_det.shape == np_det.shape + np_det = numpy.linalg.det(a) + dpnp_det = inp.linalg.det(ia) - assert_allclose(np_det, dpnp_det) + assert dpnp_det.dtype == np_det.dtype + assert dpnp_det.shape == np_det.shape + + assert_allclose(np_det, dpnp_det) + + @pytest.mark.parametrize( + "matrix", + [ + [[1, 2], [2, 4]], + [[0, 0], [0, 0]], + [[1, 1], [1, 1]], + [[2, 4], [1, 2]], + [[1, 2], [0, 0]], + [[1, 0], [2, 0]], + ], + ids=[ + "Linearly dependent rows", + "Zero matrix", + "Identical rows", + "Linearly dependent columns", + "Zero row", + "Zero column", + ], + ) + def test_det_singular_matrix(self, matrix): + a_np = numpy.array(matrix, dtype="float32") + a_dp = inp.array(a_np) + + expected = numpy.linalg.slogdet(a_np) + result = inp.linalg.slogdet(a_dp) + + assert_allclose(expected, result, rtol=1e-3, atol=1e-4) + + # TODO: remove skipif when MKLD-16626 is resolved + # _getrf_batch does not raise an error with singular matrices. + # Skip running on cpu because dpnp uses _getrf_batch only on cpu. + @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626") + def test_det_singular_matrix_3D(self): + a_np = numpy.array( + [[[1, 2], [3, 4]], [[1, 2], [1, 2]], [[1, 3], [3, 1]]] + ) + a_dp = inp.array(a_np) + + expected = numpy.linalg.det(a_np) + result = inp.linalg.det(a_dp) + + assert_allclose(expected, result, rtol=1e-3, atol=1e-4) + + def test_det_errors(self): + a_dp = inp.array([[1, 2], [3, 5]], dtype="float32") + + # unsupported type + a_np = inp.asnumpy(a_dp) + assert_raises(TypeError, inp.linalg.det, a_np) @pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True)) @@ -720,7 +788,7 @@ def test_slogdet_singular_matrix_3D(self): assert_allclose(sign_expected, sign_result) assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4) - def test_solve_errors(self): + def test_slogdet_errors(self): a_dp = inp.array([[1, 2], [3, 5]], dtype="float32") # unsupported type From c9b7c3bce857337ea8e02a667f5afbd25b6320fa Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Mon, 8 Jan 2024 12:41:53 +0100 Subject: [PATCH 47/55] Use is_exception_caught flag in getrf and getrf_batch error handler --- dpnp/backend/extensions/lapack/getrf.cpp | 13 +++++-------- dpnp/backend/extensions/lapack/getrf_batch.cpp | 10 ++++------ 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp index 8d0e683df07f..13f759e4e4d8 100644 --- a/dpnp/backend/extensions/lapack/getrf.cpp +++ b/dpnp/backend/extensions/lapack/getrf.cpp @@ -77,8 +77,7 @@ static sycl::event getrf_impl(sycl::queue exec_q, std::stringstream error_msg; std::int64_t info = 0; - bool mkl_exception_caught = false; - bool sycl_exception_caught = false; + bool is_exception_caught = false; sycl::event getrf_event; try { @@ -98,18 +97,17 @@ static sycl::event getrf_impl(sycl::queue exec_q, // routine for storing intermediate results. scratchpad_size, depends); } catch (mkl_lapack::exception const &e) { + is_exception_caught = true; info = e.info(); if (info < 0) { error_msg << "Parameter number " << -info << " had an illegal value."; - mkl_exception_caught = true; } else if (info == scratchpad_size && e.detail() != 0) { error_msg << "Insufficient scratchpad size. Required size is at least " << e.detail(); - mkl_exception_caught = true; } else if (info > 0) { // Store the positive 'info' value in the first element of @@ -117,22 +115,21 @@ static sycl::event getrf_impl(sycl::queue exec_q, // completed, but the factor U (upper triangular matrix) is exactly // singular. The 'info' value here is the index of the first zero // element in the diagonal of U. + is_exception_caught = false; dev_info[0] = info; } else { error_msg << "Unexpected MKL exception caught during getrf() " "call:\nreason: " << e.what() << "\ninfo: " << e.info(); - mkl_exception_caught = true; } } catch (sycl::exception const &e) { + is_exception_caught = true; error_msg << "Unexpected SYCL exception caught during getrf() call:\n" << e.what(); - sycl_exception_caught = true; } - if (mkl_exception_caught || - sycl_exception_caught) // an unexpected error occurs + if (is_exception_caught) // an unexpected error occurs { if (scratchpad != nullptr) { sycl::free(scratchpad, exec_q); diff --git a/dpnp/backend/extensions/lapack/getrf_batch.cpp b/dpnp/backend/extensions/lapack/getrf_batch.cpp index 20a03a085074..4bfbca06a11a 100644 --- a/dpnp/backend/extensions/lapack/getrf_batch.cpp +++ b/dpnp/backend/extensions/lapack/getrf_batch.cpp @@ -86,8 +86,7 @@ static sycl::event getrf_batch_impl(sycl::queue exec_q, std::stringstream error_msg; std::int64_t info = 0; - bool mkl_exception_caught = false; - bool sycl_exception_caught = false; + bool is_exception_caught = false; sycl::event getrf_batch_event; try { @@ -123,7 +122,7 @@ static sycl::event getrf_batch_impl(sycl::queue exec_q, dev_info[error_matrices_ids[i]] = error_info[i]; } } catch (mkl_lapack::exception const &e) { - mkl_exception_caught = true; + is_exception_caught = true; info = e.info(); if (info < 0) { @@ -141,14 +140,13 @@ static sycl::event getrf_batch_impl(sycl::queue exec_q, << e.what() << "\ninfo: " << e.info(); } } catch (sycl::exception const &e) { + is_exception_caught = true; error_msg << "Unexpected SYCL exception caught during getrf_batch() call:\n" << e.what(); - sycl_exception_caught = true; } - if (mkl_exception_caught || - sycl_exception_caught) // an unexpected error occurs + if (is_exception_caught) // an unexpected error occurs { if (scratchpad != nullptr) { sycl::free(scratchpad, exec_q); From a3873cd78a25416468a13e2af6a6e926fbbe45bc Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Mon, 8 Jan 2024 12:53:14 +0100 Subject: [PATCH 48/55] Update gesv error handler --- dpnp/backend/extensions/lapack/gesv.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/dpnp/backend/extensions/lapack/gesv.cpp b/dpnp/backend/extensions/lapack/gesv.cpp index 72e5aa806714..781356a2f52a 100644 --- a/dpnp/backend/extensions/lapack/gesv.cpp +++ b/dpnp/backend/extensions/lapack/gesv.cpp @@ -84,7 +84,7 @@ static sycl::event gesv_impl(sycl::queue exec_q, std::stringstream error_msg; std::int64_t info = 0; - bool sycl_exception_caught = false; + bool is_exception_caught = false; sycl::event gesv_event; try { @@ -106,12 +106,18 @@ static sycl::event gesv_impl(sycl::queue exec_q, // routine for storing intermediate results. scratchpad_size, depends); } catch (mkl_lapack::exception const &e) { + is_exception_caught = true; info = e.info(); if (info < 0) { error_msg << "Parameter number " << -info << " had an illegal value."; } + else if (info == scratchpad_size && e.detail() != 0) { + error_msg + << "Insufficient scratchpad size. Required size is at least " + << e.detail(); + } else if (info > 0) { T host_U; exec_q.memcpy(&host_U, &a[(info - 1) * lda + info - 1], sizeof(T)) @@ -131,23 +137,18 @@ static sycl::event gesv_impl(sycl::queue exec_q, << e.what() << "\ninfo: " << e.info(); } } - else if (info == scratchpad_size && e.detail() != 0) { - error_msg - << "Insufficient scratchpad size. Required size is at least " - << e.detail(); - } else { error_msg << "Unexpected MKL exception caught during gesv() " "call:\nreason: " << e.what() << "\ninfo: " << e.info(); } } catch (sycl::exception const &e) { + is_exception_caught = true; error_msg << "Unexpected SYCL exception caught during gesv() call:\n" << e.what(); - sycl_exception_caught = true; } - if (info != 0 || sycl_exception_caught) // an unexpected error occurs + if (is_exception_caught) // an unexpected error occurs { if (scratchpad != nullptr) { sycl::free(scratchpad, exec_q); From 9652797a23810c5db4d8609b4756622ff00dd83a Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Mon, 8 Jan 2024 15:29:46 +0100 Subject: [PATCH 49/55] Reshape results after calling getrf_batch --- dpnp/linalg/dpnp_utils_linalg.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index fdeabfa38ed4..878b4fc480bb 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -290,6 +290,11 @@ def _lu_factor(a, res_type): dev_info_h, usm_type=a_usm_type, sycl_queue=a_sycl_queue ) + # Reshape the results back to their original shape + a_h = a_h.reshape(orig_shape) + ipiv_h = ipiv_h.reshape(orig_shape[:-1]) + dev_info_array = dev_info_array.reshape(orig_shape[:-2]) + return (a_h, ipiv_h, dev_info_array) else: From cef4690aa9c87df868435dd5dafbef3fb05507cd Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 9 Jan 2024 13:11:39 +0100 Subject: [PATCH 50/55] Add a new dpnp.linalg.det impl and refresh dpnp_utils_linalg --- dpnp/linalg/dpnp_iface_linalg.py | 6 +- dpnp/linalg/dpnp_utils_linalg.py | 155 ++++++++++++++---- .../cupy/linalg_tests/test_norms.py | 1 - 3 files changed, 123 insertions(+), 39 deletions(-) diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py index 45b159428716..ab22482a81b6 100644 --- a/dpnp/linalg/dpnp_iface_linalg.py +++ b/dpnp/linalg/dpnp_iface_linalg.py @@ -50,6 +50,7 @@ from .dpnp_utils_linalg import ( check_stacked_2d, check_stacked_square, + dpnp_det, dpnp_eigh, dpnp_slogdet, dpnp_solve, @@ -195,9 +196,10 @@ def det(a): """ dpnp.check_supported_arrays_type(a) + check_stacked_2d(a) + check_stacked_square(a) - sign, logdet = slogdet(a) - return sign * dpnp.exp(logdet) + return dpnp_det(a) def eig(x1): diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 878b4fc480bb..86227159c187 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -37,6 +37,7 @@ __all__ = [ "check_stacked_2d", "check_stacked_square", + "dpnp_det", "dpnp_eigh", "dpnp_slogdet", "dpnp_solve", @@ -53,6 +54,50 @@ } +def _calculate_determinant_sign(ipiv, diag, res_type, n): + """ + Calculate the sign of the determinant based on row exchanges and diagonal values. + + Parameters + ----------- + ipiv : {dpnp.ndarray, usm_ndarray} + The pivot indices from LU decomposition. + diag : {dpnp.ndarray, usm_ndarray} + The diagonal elements of the LU decomposition matrix. + res_type : dpnp.dtype + The common data type for linalg operations. + n : int + The size of the last two dimensions of the array. + + Returns + ------- + sign : {dpnp_array, usm_ndarray} + The sign of the determinant. + + """ + + # Checks for row exchanges in LU decomposition affecting determinant sign. + ipiv_diff = ipiv != dpnp.arange( + 1, n + 1, usm_type=ipiv.usm_type, sycl_queue=ipiv.sycl_queue + ) + + # Counts row exchanges from 'ipiv_diff'. + non_zero = dpnp.count_nonzero(ipiv_diff, axis=-1) + + # For floating types, adds count of negative diagonal elements + # to determine determinant sign. + if dpnp.issubdtype(res_type, dpnp.floating): + non_zero += dpnp.count_nonzero(diag < 0, axis=-1) + + sign = (non_zero % 2) * -2 + 1 + + # For complex types, compute sign from the phase of diagonal elements. + if dpnp.issubdtype(res_type, dpnp.complexfloating): + sign = sign * dpnp.prod(diag / dpnp.abs(diag), axis=-1) + + return sign.astype(res_type) + + def _real_type(dtype, device=None): """ Returns the real data type corresponding to a given dpnp data type. @@ -84,7 +129,7 @@ def check_stacked_2d(*arrays): Parameters ---------- - arrays : {dpnp_array, usm_ndarray} + arrays : {dpnp.ndarray, usm_ndarray} A sequence of input arrays to check for dimensionality. Returns @@ -123,7 +168,7 @@ def check_stacked_square(*arrays): Parameters ---------- - arrays : {dpnp_array, usm_ndarray} + arrays : {dpnp.ndarray, usm_ndarray} A sequence of input arrays to check for square matrix shape. Returns @@ -148,8 +193,6 @@ def check_stacked_square(*arrays): def _common_type(*arrays): """ - _common_type(*arrays) - Common type for linear algebra operations. This function determines the common data type for linalg operations. @@ -160,12 +203,15 @@ def _common_type(*arrays): - The default floating-point data type is determined by the capabilities of the device on which `arrays` are created, as indicated by `dpnp.default_float_type()`. - Args: - *arrays (dpnp.ndarray): Input arrays. - - Returns: - dtype_common (dtype): The common data type for linalg operations. + Parameters + ---------- + arrays : {dpnp.ndarray, usm_ndarray} + A sequence of input arrays. + Returns + ------- + dtype_common : dpnp.dtype + The common data type for linalg operations. This returned value is applicable both as the precision to be used in linalg calls and as the dtype of (possibly complex) output(s). @@ -181,24 +227,27 @@ def _common_type(*arrays): def _common_inexact_type(default_dtype, *dtypes): """ - _common_inexact_type(default_dtype, *dtypes) - Determines the common 'inexact' data type for linear algebra operations. This function selects an 'inexact' data type appropriate for the device's capabilities. It defaults to `default_dtype` when provided types are not 'inexact'. - Args: - default_dtype: The default data type. This is determined by the capabilities of + Parameters + ---------- + default_dtype : dpnp.dtype + The default data type. This is determined by the capabilities of the device and is used when none of the provided types are 'inexact'. *dtypes: A variable number of data types to be evaluated to find the common 'inexact' type. - Returns: - dpnp.result_type (dtype) : The resultant 'inexact' data type for linalg operations, + Returns + ------- + dpnp.result_type : dpnp.dtype + The resultant 'inexact' data type for linalg operations, ensuring computational compatibility. """ + inexact_dtypes = [ dt if issubdtype(dt, dpnp.inexact) else default_dtype for dt in dtypes ] @@ -214,15 +263,15 @@ def _lu_factor(a, res_type): Parameters ---------- - a : (..., M, M) {dpnp.ndarray, usm_ndarray} - Input array containing the matrices to be decomposed. - res_type : dpnp.dtype - Specifies the data type of the result. - Acceptable data types are float32, float64, complex64, or complex128. + a : (..., M, M) {dpnp.ndarray, usm_ndarray} + Input array containing the matrices to be decomposed. + res_type : dpnp.dtype + Specifies the data type of the result. + Acceptable data types are float32, float64, complex64, or complex128. Returns ------- - tuple: + tuple: lu_t : (..., N, N) {dpnp.ndarray, usm_ndarray} Combined 'L' and 'U' matrices from LU decomposition excluding the diagonal of 'L'. @@ -388,6 +437,54 @@ def _lu_factor(a, res_type): return (a_h, ipiv_h, dev_info_array) +def dpnp_det(a): + """ + dpnp_det(a) + + Returns the determinant of `a` array. + + """ + + a_usm_type = a.usm_type + a_sycl_queue = a.sycl_queue + + res_type = _common_type(a) + det_dtype = _real_type(res_type) + + a_shape = a.shape + shape = a_shape[:-2] + n = a_shape[-2] + + if a.size == 0: + # empty batch (result is empty, too) or empty matrices det([[]]) == 1 + det = dpnp.ones( + shape, + dtype=det_dtype, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + return det + + lu, ipiv, dev_info = _lu_factor(a, res_type) + + # Transposing 'lu' to swap the last two axes for compatibility + # with 'dpnp.diagonal' as it does not support 'axis1' and 'axis2' arguments. + # TODO: Replace with 'dpnp.diagonal(lu, axis1=-2, axis2=-1)' when supported. + lu_transposed = lu.transpose(-2, -1, *range(lu.ndim - 2)) + diag = dpnp.diagonal(lu_transposed) + + det = dpnp.prod(diag, axis=-1) + + sign = _calculate_determinant_sign(ipiv, diag, res_type, n) + + det = sign * det + det = det.astype(det_dtype, copy=False) + singular = dpnp.array([dev_info > 0]) + det = dpnp.where(singular, res_type.type(0), det) + + return det.reshape(shape) + + def dpnp_eigh(a, UPLO): """ dpnp_eigh(a, UPLO) @@ -689,22 +786,8 @@ def dpnp_slogdet(a): logdet = dpnp.log(dpnp.abs(diag)).sum(axis=-1) - # ipiv is 1-origin - non_zero = dpnp.count_nonzero( - ipiv - != dpnp.arange( - 1, n + 1, usm_type=ipiv.usm_type, sycl_queue=ipiv.sycl_queue - ), - axis=-1, - ) - if dpnp.issubdtype(res_type, dpnp.floating): - non_zero += dpnp.count_nonzero(diag < 0, axis=-1) - - sign = (non_zero % 2) * -2 + 1 - if dpnp.issubdtype(res_type, dpnp.complexfloating): - sign = sign * dpnp.prod(diag / dpnp.abs(diag), axis=-1) + sign = _calculate_determinant_sign(ipiv, diag, res_type, n) - sign = sign.astype(res_type) logdet = logdet.astype(logdet_dtype, copy=False) singular = dpnp.array([dev_info > 0]) return ( diff --git a/tests/third_party/cupy/linalg_tests/test_norms.py b/tests/third_party/cupy/linalg_tests/test_norms.py index bcf2c21d27f8..41d4830e84d9 100644 --- a/tests/third_party/cupy/linalg_tests/test_norms.py +++ b/tests/third_party/cupy/linalg_tests/test_norms.py @@ -4,7 +4,6 @@ import pytest import dpnp as cupy -from tests.helper import is_cpu_device from tests.third_party.cupy import testing From 67681bff019c8a8e53b09b34cb31c57a084c7f85 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 9 Jan 2024 13:20:04 +0100 Subject: [PATCH 51/55] Remove Limitations from dpnp_det and dpnp_slogdet docstings --- dpnp/linalg/dpnp_iface_linalg.py | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py index ab22482a81b6..b7a56c4c7db2 100644 --- a/dpnp/linalg/dpnp_iface_linalg.py +++ b/dpnp/linalg/dpnp_iface_linalg.py @@ -167,11 +167,6 @@ def det(a): det : (...) dpnp.ndarray Determinant of `a`. - Limitations - ----------- - Parameter `a` is supported as :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`. - Input array data types are limited by supported DPNP :ref:`Data types`. - See Also -------- :obj:`dpnp.linalg.slogdet` : Returns sign and logarithm of the determinant of an array. @@ -674,17 +669,12 @@ def slogdet(a): Returns ------- - sign : (...) dpnp.ndarray - A number representing the sign of the determinant. For a real matrix, - this is 1, 0, or -1. For a complex matrix, this is a complex number - with absolute value 1 (i.e., it is on the unit circle), or else 0. - logabsdet : (...) dpnp.ndarray - The natural log of the absolute value of the determinant. - - Limitations - ----------- - Parameter `a` is supported as :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`. - Input array data types are limited by supported DPNP :ref:`Data types`. + sign : (...) dpnp.ndarray + A number representing the sign of the determinant. For a real matrix, + this is 1, 0, or -1. For a complex matrix, this is a complex number + with absolute value 1 (i.e., it is on the unit circle), or else 0. + logabsdet : (...) dpnp.ndarray + The natural log of the absolute value of the determinant. See Also -------- From ed71f6b2b654142de6e1cf8b66fd657ae6c8f976 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 10 Jan 2024 12:41:15 +0100 Subject: [PATCH 52/55] Address remarks --- dpnp/backend/extensions/lapack/getrf.cpp | 11 +++++++++++ dpnp/backend/extensions/lapack/getrf_batch.cpp | 11 +++++++++++ dpnp/linalg/dpnp_utils_linalg.py | 12 +++++++++--- tests/test_linalg.py | 5 +++-- 4 files changed, 34 insertions(+), 5 deletions(-) diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp index 13f759e4e4d8..faebb9f7d42f 100644 --- a/dpnp/backend/extensions/lapack/getrf.cpp +++ b/dpnp/backend/extensions/lapack/getrf.cpp @@ -175,11 +175,22 @@ std::pair "Execution queue is not compatible with allocation queues"); } + auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); + if (overlap(a_array, ipiv_array)) { + throw py::value_error("The input array and the array of pivot indices " + "are overlapping segments of memory"); + } + bool is_a_array_c_contig = a_array.is_c_contiguous(); + bool is_ipiv_array_c_contig = ipiv_array.is_c_contiguous(); if (!is_a_array_c_contig) { throw py::value_error("The input array " "must be C-contiguous"); } + if (!is_ipiv_array_c_contig) { + throw py::value_error("The array of pivot indices " + "must be C-contiguous"); + } auto array_types = dpctl_td_ns::usm_ndarray_types(); int a_array_type_id = diff --git a/dpnp/backend/extensions/lapack/getrf_batch.cpp b/dpnp/backend/extensions/lapack/getrf_batch.cpp index 4bfbca06a11a..0944d7437aed 100644 --- a/dpnp/backend/extensions/lapack/getrf_batch.cpp +++ b/dpnp/backend/extensions/lapack/getrf_batch.cpp @@ -196,11 +196,22 @@ std::pair "Execution queue is not compatible with allocation queues"); } + auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); + if (overlap(a_array, ipiv_array)) { + throw py::value_error("The input array and the array of pivot indices " + "are overlapping segments of memory"); + } + bool is_a_array_c_contig = a_array.is_c_contiguous(); + bool is_ipiv_array_c_contig = ipiv_array.is_c_contiguous(); if (!is_a_array_c_contig) { throw py::value_error("The input array " "must be C-contiguous"); } + if (!is_ipiv_array_c_contig) { + throw py::value_error("The array of pivot indices " + "must be C-contiguous"); + } auto array_types = dpctl_td_ns::usm_ndarray_types(); int a_array_type_id = diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 86227159c187..ba402e8ccb7a 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -306,6 +306,7 @@ def _lu_factor(a, res_type): ipiv_h = dpnp.empty( (batch_size, n), dtype=dpnp.int64, + order="C", usm_type=a_usm_type, sycl_queue=a_sycl_queue, ) @@ -369,6 +370,7 @@ def _lu_factor(a, res_type): ipiv_vecs[i] = dpnp.empty( (n,), dtype=dpnp.int64, + order="C", usm_type=a_usm_type, sycl_queue=a_sycl_queue, ) @@ -410,7 +412,11 @@ def _lu_factor(a, res_type): ) ipiv_h = dpnp.empty( - n, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue + n, + dtype=dpnp.int64, + order="C", + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, ) dev_info_h = [0] @@ -479,7 +485,7 @@ def dpnp_det(a): det = sign * det det = det.astype(det_dtype, copy=False) - singular = dpnp.array([dev_info > 0]) + singular = dev_info > 0 det = dpnp.where(singular, res_type.type(0), det) return det.reshape(shape) @@ -789,7 +795,7 @@ def dpnp_slogdet(a): sign = _calculate_determinant_sign(ipiv, diag, res_type, n) logdet = logdet.astype(logdet_dtype, copy=False) - singular = dpnp.array([dev_info > 0]) + singular = dev_info > 0 return ( dpnp.where(singular, res_type.type(0), sign).reshape(shape), dpnp.where(singular, logdet_dtype.type("-inf"), logdet).reshape(shape), diff --git a/tests/test_linalg.py b/tests/test_linalg.py index 8c1085e64684..735cf1a8918c 100644 --- a/tests/test_linalg.py +++ b/tests/test_linalg.py @@ -122,8 +122,9 @@ class TestDet: "4D_array", ], ) - def test_det(self, array): - a = numpy.array(array) + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) + def test_det(self, array, dtype): + a = numpy.array(array, dtype=dtype) ia = inp.array(a) result = inp.linalg.det(ia) expected = numpy.linalg.det(a) From 75fa23a62a0149dcf608633567fa19e555fa8f5a Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 11 Jan 2024 12:45:39 +0100 Subject: [PATCH 53/55] Remove det_dtype variable and use the abs val of diag for det --- dpnp/linalg/dpnp_utils_linalg.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index ba402e8ccb7a..bd44b9742534 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -455,7 +455,6 @@ def dpnp_det(a): a_sycl_queue = a.sycl_queue res_type = _common_type(a) - det_dtype = _real_type(res_type) a_shape = a.shape shape = a_shape[:-2] @@ -465,7 +464,7 @@ def dpnp_det(a): # empty batch (result is empty, too) or empty matrices det([[]]) == 1 det = dpnp.ones( shape, - dtype=det_dtype, + dtype=res_type, usm_type=a_usm_type, sycl_queue=a_sycl_queue, ) @@ -479,12 +478,12 @@ def dpnp_det(a): lu_transposed = lu.transpose(-2, -1, *range(lu.ndim - 2)) diag = dpnp.diagonal(lu_transposed) - det = dpnp.prod(diag, axis=-1) + det = dpnp.prod(dpnp.abs(diag), axis=-1) sign = _calculate_determinant_sign(ipiv, diag, res_type, n) det = sign * det - det = det.astype(det_dtype, copy=False) + det = det.astype(res_type, copy=False) singular = dev_info > 0 det = dpnp.where(singular, res_type.type(0), det) From e9bdcd60bb47e4a81bc435f9d568e071336643bb Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 11 Jan 2024 12:46:44 +0100 Subject: [PATCH 54/55] Expand cupy tests for dpnp.linalg.det() --- .../cupy/linalg_tests/test_norms.py | 31 ++++++++++++------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/tests/third_party/cupy/linalg_tests/test_norms.py b/tests/third_party/cupy/linalg_tests/test_norms.py index 41d4830e84d9..2ed49d16057a 100644 --- a/tests/third_party/cupy/linalg_tests/test_norms.py +++ b/tests/third_party/cupy/linalg_tests/test_norms.py @@ -4,81 +4,90 @@ import pytest import dpnp as cupy +from tests.helper import is_cpu_device from tests.third_party.cupy import testing +# TODO: Remove the use of fixture for all tests in this file +# when dpnp.prod() will support complex dtypes on Gen9 +@pytest.mark.usefixtures("allow_fall_back_on_numpy") class TestDet(unittest.TestCase): - @testing.for_float_dtypes(no_float16=True) + @testing.for_dtypes("fdFD") @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) def test_det(self, xp, dtype): a = testing.shaped_arange((2, 2), xp, dtype) + 1 return xp.linalg.det(a) - @testing.for_float_dtypes(no_float16=True) + @testing.for_dtypes("fdFD") @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) def test_det_3(self, xp, dtype): a = testing.shaped_arange((2, 2, 2), xp, dtype) + 1 return xp.linalg.det(a) - @testing.for_float_dtypes(no_float16=True) + @testing.for_dtypes("fdFD") @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) def test_det_4(self, xp, dtype): a = testing.shaped_arange((2, 2, 2, 2), xp, dtype) + 1 return xp.linalg.det(a) - @testing.for_float_dtypes(no_float16=True) + @testing.for_dtypes("fdFD") @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) def test_det_empty_batch(self, xp, dtype): a = xp.empty((2, 0, 3, 3), dtype=dtype) return xp.linalg.det(a) - @testing.for_float_dtypes(no_float16=True) + @testing.for_dtypes("fdFD") @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) def test_det_empty_matrix(self, xp, dtype): a = xp.empty((0, 0), dtype=dtype) return xp.linalg.det(a) - @testing.for_float_dtypes(no_float16=True) + @testing.for_dtypes("fdFD") @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) def test_det_empty_matrices(self, xp, dtype): a = xp.empty((2, 3, 0, 0), dtype=dtype) return xp.linalg.det(a) - @testing.for_float_dtypes(no_float16=True) + @testing.for_dtypes("fdFD") def test_det_different_last_two_dims(self, dtype): for xp in (numpy, cupy): a = testing.shaped_arange((2, 3, 2), xp, dtype) with pytest.raises(xp.linalg.LinAlgError): xp.linalg.det(a) - @testing.for_float_dtypes(no_float16=True) + @testing.for_dtypes("fdFD") def test_det_different_last_two_dims_empty_batch(self, dtype): for xp in (numpy, cupy): a = xp.empty((0, 3, 2), dtype=dtype) with pytest.raises(xp.linalg.LinAlgError): xp.linalg.det(a) - @testing.for_float_dtypes(no_float16=True) + @testing.for_dtypes("fdFD") def test_det_one_dim(self, dtype): for xp in (numpy, cupy): a = testing.shaped_arange((2,), xp, dtype) with pytest.raises(xp.linalg.LinAlgError): xp.linalg.det(a) - @testing.for_float_dtypes(no_float16=True) + @testing.for_dtypes("fdFD") def test_det_zero_dim(self, dtype): for xp in (numpy, cupy): a = testing.shaped_arange((), xp, dtype) with pytest.raises(xp.linalg.LinAlgError): xp.linalg.det(a) - @testing.for_float_dtypes(no_float16=True) + # TODO: remove skipif when MKLD-16626 is resolved + # _getrf_batch does not raise an error with singular matrices. + # Skip running on cpu because dpnp uses _getrf_batch only on cpu. + @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626") + @testing.for_dtypes("fdFD") @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) def test_det_singular(self, xp, dtype): a = xp.zeros((2, 3, 3), dtype=dtype) return xp.linalg.det(a) +@pytest.mark.usefixtures("allow_fall_back_on_numpy") class TestSlogdet(unittest.TestCase): @testing.for_dtypes("fdFD") @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) From 549b5daee9b0873c23b600cdc413fe603af8c734 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 11 Jan 2024 16:21:17 +0100 Subject: [PATCH 55/55] Update TestDet and TestSlogdet --- tests/test_linalg.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/test_linalg.py b/tests/test_linalg.py index 735cf1a8918c..cb780abd9253 100644 --- a/tests/test_linalg.py +++ b/tests/test_linalg.py @@ -106,6 +106,9 @@ def test_cond(arr, p): class TestDet: + # TODO: Remove the use of fixture for test_det + # when dpnp.prod() will support complex dtypes on Gen9 + @pytest.mark.usefixtures("allow_fall_back_on_numpy") @pytest.mark.parametrize( "array", [ @@ -690,6 +693,9 @@ def test_solve_errors(self): class TestSlogdet: + # TODO: Remove the use of fixture for test_slogdet_2d and test_slogdet_3d + # when dpnp.prod() will support complex dtypes on Gen9 + @pytest.mark.usefixtures("allow_fall_back_on_numpy") @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) def test_slogdet_2d(self, dtype): a_np = numpy.array([[1, 2], [3, 4]], dtype=dtype) @@ -701,6 +707,7 @@ def test_slogdet_2d(self, dtype): assert_allclose(sign_expected, sign_result) assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4) + @pytest.mark.usefixtures("allow_fall_back_on_numpy") @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) def test_slogdet_3d(self, dtype): a_np = numpy.array(