From 4e2640398c124b06c30d261b0b6c3717c25edd3a Mon Sep 17 00:00:00 2001 From: Alexander Kalistratov Date: Tue, 22 Oct 2024 17:53:10 +0200 Subject: [PATCH 1/5] Implementation of correlate --- .../extensions/statistics/CMakeLists.txt | 5 +- .../extensions/statistics/dispatch_table.hpp | 98 +++ .../statistics/histogram_common.cpp | 140 ++-- .../statistics/sliding_dot_product1d.cpp | 170 +++++ .../statistics/sliding_dot_product1d.hpp | 66 ++ .../statistics/sliding_window1d.cpp | 96 +++ .../statistics/sliding_window1d.hpp | 660 ++++++++++++++++++ .../extensions/statistics/statistics_py.cpp | 2 + .../statistics/validation_utils.cpp | 190 +++++ .../statistics/validation_utils.hpp | 73 ++ dpnp/dpnp_iface_statistics.py | 194 ++++- dpnp/dpnp_utils/dpnp_utils_common.py | 81 +++ dpnp/tests/test_statistics.py | 73 ++ dpnp/tests/test_sycl_queue.py | 1 + dpnp/tests/test_usm_type.py | 1 + .../cupy/statistics_tests/test_correlation.py | 2 - tests_external/skipped_tests_numpy.tbl | 1 - 17 files changed, 1722 insertions(+), 131 deletions(-) create mode 100644 dpnp/backend/extensions/statistics/sliding_dot_product1d.cpp create mode 100644 dpnp/backend/extensions/statistics/sliding_dot_product1d.hpp create mode 100644 dpnp/backend/extensions/statistics/sliding_window1d.cpp create mode 100644 dpnp/backend/extensions/statistics/sliding_window1d.hpp create mode 100644 dpnp/backend/extensions/statistics/validation_utils.cpp create mode 100644 dpnp/backend/extensions/statistics/validation_utils.hpp create mode 100644 dpnp/dpnp_utils/dpnp_utils_common.py diff --git a/dpnp/backend/extensions/statistics/CMakeLists.txt b/dpnp/backend/extensions/statistics/CMakeLists.txt index 26dda2800086..dcec77ae603a 100644 --- a/dpnp/backend/extensions/statistics/CMakeLists.txt +++ b/dpnp/backend/extensions/statistics/CMakeLists.txt @@ -26,11 +26,14 @@ set(python_module_name _statistics_impl) set(_module_src - ${CMAKE_CURRENT_SOURCE_DIR}/common.cpp ${CMAKE_CURRENT_SOURCE_DIR}/bincount.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/common.cpp ${CMAKE_CURRENT_SOURCE_DIR}/histogram.cpp ${CMAKE_CURRENT_SOURCE_DIR}/histogram_common.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/sliding_dot_product1d.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/sliding_window1d.cpp ${CMAKE_CURRENT_SOURCE_DIR}/statistics_py.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/validation_utils.cpp ) pybind11_add_module(${python_module_name} MODULE ${_module_src}) diff --git a/dpnp/backend/extensions/statistics/dispatch_table.hpp b/dpnp/backend/extensions/statistics/dispatch_table.hpp index ce11de891318..42c170c53617 100644 --- a/dpnp/backend/extensions/statistics/dispatch_table.hpp +++ b/dpnp/backend/extensions/statistics/dispatch_table.hpp @@ -97,6 +97,32 @@ using DTypePair = std::pair; using SupportedDTypeList = std::vector; using SupportedDTypeList2 = std::vector; +template + typename Func> +struct TableBuilder +{ + template + struct impl + { + static constexpr bool is_defined = one_of_v; + + _FnT get() + { + if constexpr (is_defined) { + return Func::impl; + } + else { + return nullptr; + } + } + }; + + using type = + dpctl_td_ns::DispatchVectorBuilder; +}; + template @@ -124,6 +150,78 @@ struct TableBuilder2 dpctl_td_ns::DispatchTableBuilder; }; +template +class DispatchTable +{ +public: + DispatchTable(std::string name) : name(name) {} + + template typename Func> + void populate_dispatch_table() + { + using TBulder = typename TableBuilder::type; + TBulder builder; + + builder.populate_dispatch_vector(table); + populate_supported_types(); + } + + FnT get_unsafe(int _typenum) const + { + auto array_types = dpctl_td_ns::usm_ndarray_types(); + const int type_id = array_types.typenum_to_lookup_id(_typenum); + + return table[type_id]; + } + + FnT get(int _typenum) const + { + auto fn = get_unsafe(_typenum); + + if (fn == nullptr) { + auto array_types = dpctl_td_ns::usm_ndarray_types(); + const int _type_id = array_types.typenum_to_lookup_id(_typenum); + + py::dtype _dtype = dtype_from_typenum(_type_id); + auto _type_pos = std::find(supported_types.begin(), + supported_types.end(), _dtype); + if (_type_pos == supported_types.end()) { + py::str types = py::str(py::cast(supported_types)); + py::str dtype = py::str(_dtype); + + py::str err_msg = + py::str("'" + name + "' has unsupported type '") + dtype + + py::str("'." + " Supported types are: ") + + types; + + throw py::value_error(static_cast(err_msg)); + } + } + + return fn; + } + + const SupportedDTypeList &get_all_supported_types() const + { + return supported_types; + } + +private: + void populate_supported_types() + { + for (int i = 0; i < dpctl_td_ns::num_types; ++i) { + if (table[i] != nullptr) { + supported_types.emplace_back(dtype_from_typenum(i)); + } + } + } + + std::string name; + SupportedDTypeList supported_types; + Table table; +}; + template class DispatchTable2 { diff --git a/dpnp/backend/extensions/statistics/histogram_common.cpp b/dpnp/backend/extensions/statistics/histogram_common.cpp index e2445b78bb3f..af47fe633674 100644 --- a/dpnp/backend/extensions/statistics/histogram_common.cpp +++ b/dpnp/backend/extensions/statistics/histogram_common.cpp @@ -38,6 +38,8 @@ #include "histogram_common.hpp" +#include "validation_utils.hpp" + namespace dpctl_td_ns = dpctl::tensor::type_dispatch; using dpctl::tensor::usm_ndarray; using dpctl_td_ns::typenum_t; @@ -46,6 +48,15 @@ namespace statistics { using common::CeilDiv; +using validation::array_names; +using validation::array_ptr; + +using validation::check_max_dims; +using validation::check_num_dims; +using validation::check_size_at_least; +using validation::common_checks; +using validation::name_of; + namespace histogram { @@ -55,11 +66,9 @@ void validate(const usm_ndarray &sample, const usm_ndarray &histogram) { auto exec_q = sample.get_queue(); - using array_ptr = const usm_ndarray *; std::vector arrays{&sample, &histogram}; - std::unordered_map names = { - {arrays[0], "sample"}, {arrays[1], "histogram"}}; + array_names names = {{arrays[0], "sample"}, {arrays[1], "histogram"}}; array_ptr bins_ptr = nullptr; @@ -77,117 +86,48 @@ void validate(const usm_ndarray &sample, names.insert({weights_ptr, "weights"}); } - auto get_name = [&](const array_ptr &arr) { - auto name_it = names.find(arr); - assert(name_it != names.end()); - - return "'" + name_it->second + "'"; - }; - - dpctl::tensor::validation::CheckWritable::throw_if_not_writable(histogram); - - auto unequal_queue = - std::find_if(arrays.cbegin(), arrays.cend(), [&](const array_ptr &arr) { - return arr->get_queue() != exec_q; - }); - - if (unequal_queue != arrays.cend()) { - throw py::value_error( - get_name(*unequal_queue) + - " parameter has incompatible queue with parameter " + - get_name(&sample)); - } - - auto non_contig_array = - std::find_if(arrays.cbegin(), arrays.cend(), [&](const array_ptr &arr) { - return !arr->is_c_contiguous(); - }); + common_checks({&sample, bins.has_value() ? &bins.value() : nullptr, + weights.has_value() ? &weights.value() : nullptr}, + {&histogram}, names); - if (non_contig_array != arrays.cend()) { - throw py::value_error(get_name(*non_contig_array) + - " parameter is not c-contiguos"); - } + check_size_at_least(bins_ptr, 2, names); - auto check_overlaping = [&](const array_ptr &first, - const array_ptr &second) { - if (first == nullptr || second == nullptr) { - return; - } - - const auto &overlap = dpctl::tensor::overlap::MemoryOverlap(); - - if (overlap(*first, *second)) { - throw py::value_error(get_name(first) + - " has overlapping memory segments with " + - get_name(second)); - } - }; - - check_overlaping(&sample, &histogram); - check_overlaping(bins_ptr, &histogram); - check_overlaping(weights_ptr, &histogram); - - if (bins_ptr && bins_ptr->get_size() < 2) { - throw py::value_error(get_name(bins_ptr) + - " parameter must have at least 2 elements"); - } - - if (histogram.get_size() < 1) { - throw py::value_error(get_name(&histogram) + - " parameter must have at least 1 element"); - } - - if (histogram.get_ndim() != 1) { - throw py::value_error(get_name(&histogram) + - " parameter must be 1d. Actual " + - std::to_string(histogram.get_ndim()) + "d"); - } + check_size_at_least(&histogram, 1, names); + check_num_dims(&histogram, 1, names); if (weights_ptr) { - if (weights_ptr->get_ndim() != 1) { - throw py::value_error( - get_name(weights_ptr) + " parameter must be 1d. Actual " + - std::to_string(weights_ptr->get_ndim()) + "d"); - } + check_num_dims(weights_ptr, 1, names); auto sample_size = sample.get_size(); auto weights_size = weights_ptr->get_size(); if (sample.get_size() != weights_ptr->get_size()) { - throw py::value_error( - get_name(&sample) + " size (" + std::to_string(sample_size) + - ") and " + get_name(weights_ptr) + " size (" + - std::to_string(weights_size) + ")" + " must match"); + throw py::value_error(name_of(&sample, names) + " size (" + + std::to_string(sample_size) + ") and " + + name_of(weights_ptr, names) + " size (" + + std::to_string(weights_size) + ")" + + " must match"); } } - if (sample.get_ndim() > 2) { - throw py::value_error( - get_name(&sample) + - " parameter must have no more than 2 dimensions. Actual " + - std::to_string(sample.get_ndim()) + "d"); - } + check_max_dims(&sample, 2, names); if (sample.get_ndim() == 1) { - if (bins_ptr != nullptr && bins_ptr->get_ndim() != 1) { - throw py::value_error(get_name(&sample) + " parameter is 1d, but " + - get_name(bins_ptr) + " is " + - std::to_string(bins_ptr->get_ndim()) + "d"); - } + check_num_dims(bins_ptr, 1, names); } else if (sample.get_ndim() == 2) { auto sample_count = sample.get_shape(0); auto expected_dims = sample.get_shape(1); if (bins_ptr != nullptr && bins_ptr->get_ndim() != expected_dims) { - throw py::value_error(get_name(&sample) + " parameter has shape {" + - std::to_string(sample_count) + "x" + - std::to_string(expected_dims) + "}" + - ", so " + get_name(bins_ptr) + - " parameter expected to be " + - std::to_string(expected_dims) + - "d. " - "Actual " + - std::to_string(bins->get_ndim()) + "d"); + throw py::value_error( + name_of(&sample, names) + " parameter has shape {" + + std::to_string(sample_count) + "x" + + std::to_string(expected_dims) + "}" + ", so " + + name_of(bins_ptr, names) + " parameter expected to be " + + std::to_string(expected_dims) + + "d. " + "Actual " + + std::to_string(bins->get_ndim()) + "d"); } } @@ -199,9 +139,9 @@ void validate(const usm_ndarray &sample, if (histogram.get_size() != expected_hist_size) { throw py::value_error( - get_name(&histogram) + " and " + get_name(bins_ptr) + - " shape mismatch. " + get_name(&histogram) + - " expected to have size = " + + name_of(&histogram, names) + " and " + + name_of(bins_ptr, names) + " shape mismatch. " + + name_of(&histogram, names) + " expected to have size = " + std::to_string(expected_hist_size) + ". Actual " + std::to_string(histogram.get_size())); } @@ -209,7 +149,7 @@ void validate(const usm_ndarray &sample, int64_t max_hist_size = std::numeric_limits::max() - 1; if (histogram.get_size() > max_hist_size) { - throw py::value_error(get_name(&histogram) + + throw py::value_error(name_of(&histogram, names) + " parameter size expected to be less than " + std::to_string(max_hist_size) + ". Actual " + std::to_string(histogram.get_size())); @@ -225,7 +165,7 @@ void validate(const usm_ndarray &sample, if (!_64bit_atomics) { auto device_name = device.get_info(); throw py::value_error( - get_name(&histogram) + + name_of(&histogram, names) + " parameter has 64-bit type, but 64-bit atomics " + " are not supported for " + device_name); } diff --git a/dpnp/backend/extensions/statistics/sliding_dot_product1d.cpp b/dpnp/backend/extensions/statistics/sliding_dot_product1d.cpp new file mode 100644 index 000000000000..ddc501bc13be --- /dev/null +++ b/dpnp/backend/extensions/statistics/sliding_dot_product1d.cpp @@ -0,0 +1,170 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +// dpctl tensor headers +#include "dpctl4pybind11.hpp" +#include "utils/type_dispatch.hpp" + +#include "common.hpp" +#include "sliding_dot_product1d.hpp" +#include "sliding_window1d.hpp" + +#include + +namespace dpctl_td_ns = dpctl::tensor::type_dispatch; +using dpctl::tensor::usm_ndarray; + +using namespace statistics::sliding_window1d; +using namespace statistics::common; + +namespace +{ + +template +struct SlidingDotProductF +{ + static sycl::event impl(sycl::queue &exec_q, + const void *v_ain, + const void *v_vin, + void *v_out, + const size_t a_size, + const size_t v_size, + const size_t l_pad, + const size_t r_pad, + const std::vector &depends) + { + const T *ain = static_cast(v_ain); + const T *vin = static_cast(v_vin); + T *out = static_cast(v_out); + + auto device = exec_q.get_device(); + const auto local_size = get_max_local_size(device); + const auto work_size = l_pad + r_pad + a_size - v_size + 1; + + constexpr int32_t WorkPI = 4; + + return exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + + auto input = make_padded_span(ain, a_size, l_pad); + auto kernel = make_span(vin, v_size); + auto result = make_span(out, work_size); + + if (v_size > 8) { + const auto nd_range = + make_ndrange(work_size, local_size, WorkPI); + submit_sliding_window1d( + input, kernel, std::multiplies<>{}, std::plus<>{}, result, + nd_range, cgh); + } + else { + const auto nd_range = + make_ndrange(work_size, local_size, WorkPI); + submit_sliding_window1d_small_kernel( + input, kernel, std::multiplies<>{}, std::plus<>{}, result, + nd_range, cgh); + } + }); + } +}; + +using SupportedTypes = std::tuple, + std::complex>; +} // namespace + +SlidingDotProduct1d::SlidingDotProduct1d() : dispatch_table("a") +{ + dispatch_table + .populate_dispatch_table(); +} + +std::tuple + SlidingDotProduct1d::call(const dpctl::tensor::usm_ndarray &a, + const dpctl::tensor::usm_ndarray &v, + dpctl::tensor::usm_ndarray &out, + const size_t l_pad, + const size_t r_pad, + const std::vector &depends) +{ + validate(a, v, out, l_pad, r_pad); + + const int a_typenum = a.get_typenum(); + + auto corr_func = dispatch_table.get(a_typenum); + + auto exec_q = a.get_queue(); + + auto ev = corr_func(exec_q, a.get_data(), v.get_data(), out.get_data(), + a.get_shape(0), v.get_shape(0), l_pad, r_pad, depends); + + sycl::event args_ev; + args_ev = dpctl::utils::keep_args_alive(exec_q, {a, v, out}, {ev}); + + return {args_ev, ev}; +} + +std::unique_ptr sdp; + +void statistics::sliding_window1d::populate_sliding_dot_product1d(py::module_ m) +{ + using namespace std::placeholders; + + sdp.reset(new SlidingDotProduct1d()); + + auto sdp_func = [sdpp = + sdp.get()](const dpctl::tensor::usm_ndarray &a, + const dpctl::tensor::usm_ndarray &v, + dpctl::tensor::usm_ndarray &out, + const size_t l_pad, const size_t r_pad, + const std::vector &depends) { + return sdpp->call(a, v, out, l_pad, r_pad, depends); + }; + + m.def("sliding_dot_product1d", sdp_func, "1d sliding dot product.", + py::arg("a"), py::arg("v"), py::arg("out"), py::arg("l_pad"), + py::arg("r_pad"), py::arg("depends") = py::list()); + + auto sdp_dtypes = [sdpp = sdp.get()]() { + return sdpp->dispatch_table.get_all_supported_types(); + }; + + m.def("sliding_dot_product1d_dtypes", sdp_dtypes, + "Get the supported data types for sliding_dot_product1d_dtypes."); +} diff --git a/dpnp/backend/extensions/statistics/sliding_dot_product1d.hpp b/dpnp/backend/extensions/statistics/sliding_dot_product1d.hpp new file mode 100644 index 000000000000..4081dc94a3ee --- /dev/null +++ b/dpnp/backend/extensions/statistics/sliding_dot_product1d.hpp @@ -0,0 +1,66 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include "dispatch_table.hpp" +#include "utils/type_dispatch.hpp" +#include +#include + +namespace dpctl_td_ns = dpctl::tensor::type_dispatch; + +namespace statistics +{ +namespace sliding_window1d +{ +struct SlidingDotProduct1d +{ + using FnT = sycl::event (*)(sycl::queue &, + const void *, + const void *, + void *, + const size_t, + const size_t, + const size_t, + const size_t, + const std::vector &); + + common::DispatchTable dispatch_table; + + SlidingDotProduct1d(); + + std::tuple + call(const dpctl::tensor::usm_ndarray &a, + const dpctl::tensor::usm_ndarray &v, + dpctl::tensor::usm_ndarray &output, + const size_t l_pad, + const size_t r_pad, + const std::vector &depends); +}; + +void populate_sliding_dot_product1d(py::module_ m); +} // namespace sliding_window1d +} // namespace statistics diff --git a/dpnp/backend/extensions/statistics/sliding_window1d.cpp b/dpnp/backend/extensions/statistics/sliding_window1d.cpp new file mode 100644 index 000000000000..bf7ec2424c8e --- /dev/null +++ b/dpnp/backend/extensions/statistics/sliding_window1d.cpp @@ -0,0 +1,96 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include +#include +#include +#include + +#include "dpctl4pybind11.hpp" +#include "utils/type_dispatch.hpp" +#include + +#include "sliding_window1d.hpp" +#include "validation_utils.hpp" + +namespace dpctl_td_ns = dpctl::tensor::type_dispatch; +using dpctl::tensor::usm_ndarray; +using dpctl_td_ns::typenum_t; + +namespace statistics +{ +using validation::array_names; +using validation::array_ptr; +using validation::check_num_dims; +using validation::common_checks; +using validation::name_of; + +namespace sliding_window1d +{ + +void validate(const usm_ndarray &a, + const usm_ndarray &v, + const usm_ndarray &out, + const size_t l_pad, + const size_t r_pad) +{ + std::vector inputs = {&a, &v}; + std::vector outputs = {&out}; + + array_names names; + names[&a] = "a"; + names[&v] = "v"; + names[&out] = "out"; + + common_checks(inputs, outputs, names); + + check_num_dims(&a, 1, names); + check_num_dims(&v, 1, names); + check_num_dims(&out, 1, names); + + py::ssize_t padded_a_size = l_pad + r_pad + a.get_size(); + + if (v.get_size() > padded_a_size) { + throw py::value_error(name_of(&v, names) + " size (" + + std::to_string(v.get_size()) + + ") must be less than or equal to a size (" + + std::to_string(a.get_size()) + ") + l_pad (" + + std::to_string(l_pad) + ") + r_pad (" + + std::to_string(r_pad) + ")"); + } + + auto expected_output_size = padded_a_size - v.get_size() + 1; + if (out.get_size() != expected_output_size) { + throw py::value_error( + name_of(&out, names) + " size (" + std::to_string(out.get_size()) + + ") must be equal to " + name_of(&a, names) + + " size + l_pad + r_pad " + name_of(&v, names) + " size + 1 (" + + std::to_string(expected_output_size) + ")"); + } +} + +} // namespace sliding_window1d +} // namespace statistics diff --git a/dpnp/backend/extensions/statistics/sliding_window1d.hpp b/dpnp/backend/extensions/statistics/sliding_window1d.hpp new file mode 100644 index 000000000000..c60c9a52adb9 --- /dev/null +++ b/dpnp/backend/extensions/statistics/sliding_window1d.hpp @@ -0,0 +1,660 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include "utils/math_utils.hpp" +#include +#include +#include +#include + +#include + +#include "common.hpp" + +namespace dpctl +{ +namespace tensor +{ +class usm_ndarray; +} +} // namespace dpctl + +using dpctl::tensor::usm_ndarray; + +namespace statistics +{ +using common::Align; +using common::CeilDiv; + +namespace sliding_window1d +{ + +template +class _RegistryDataStorage +{ +public: + using ncT = typename std::remove_const_t; + using SizeT = decltype(Size); + static constexpr SizeT _size = Size; + + _RegistryDataStorage(const sycl::nd_item<1> &item) + : sbgroup(item.get_sub_group()) + { + } + + template + T &operator[](const yT &idx) + { + static_assert(std::is_integral_v, + "idx must be of an integral type"); + return data[idx]; + } + + template + const T &operator[](const yT &idx) const + { + static_assert(std::is_integral_v, + "idx must be of an integral type"); + return data[idx]; + } + + T &value() + { + static_assert(Size == 1, + "Size is not equal to 1. Use value(idx) instead"); + return data[0]; + } + + const T &value() const + { + static_assert(Size == 1, + "Size is not equal to 1. Use value(idx) instead"); + return data[0]; + } + + template + T broadcast(const yT &y, const xT &x) const + { + static_assert(std::is_integral_v>, + "y must be of an integral type"); + static_assert(std::is_integral_v>, + "x must be of an integral type"); + + return sycl::select_from_group(sbgroup, data[y], x); + } + + template + T broadcast(const iT &idx) const + { + if constexpr (Size == 1) { + return broadcast(0, idx); + } + else { + return broadcast(idx / size_x(), idx % size_x()); + } + } + + template + T shift_left(const yT &y, const xT &x) const + { + static_assert(std::is_integral_v, "y must be of an integral type"); + static_assert(std::is_integral_v, "x must be of an integral type"); + + return sycl::shift_group_left(sbgroup, data[y], x); + } + + template + T shift_right(const yT &y, const xT &x) const + { + static_assert(std::is_integral_v, "y must be of an integral type"); + static_assert(std::is_integral_v, "x must be of an integral type"); + + return sycl::shift_group_right(sbgroup, data[y], x); + } + + constexpr SizeT size_y() const + { + return _size; + } + + SizeT size_x() const + { + return sbgroup.get_max_local_range()[0]; + } + + SizeT total_size() const + { + return size_x() * size_y(); + } + + ncT *ptr() + { + return data; + } + + SizeT x() const + { + return sbgroup.get_local_linear_id(); + } + +protected: + const sycl::sub_group sbgroup; + ncT data[Size]; +}; + +template +struct RegistryData : public _RegistryDataStorage +{ + using SizeT = typename _RegistryDataStorage::SizeT; + + using _RegistryDataStorage::_RegistryDataStorage; + + template >> + void fill_lane(const LaneIdT &lane_id, const T &value, Condition &&mask) + { + static_assert(std::is_integral_v, + "lane_id must be of an integral type"); + if (mask(this->x())) { + this->data[lane_id] = value; + } + } + + template + void fill_lane(const LaneIdT &lane_id, const T &value, const bool &mask) + { + fill_lane(lane_id, value, [mask](auto &&) { return mask; }); + } + + template + void fill_lane(const LaneIdT &lane_id, const T &value) + { + fill_lane(lane_id, value, true); + } + + template >> + void fill(const T &value, Condition &&mask) + { + for (SizeT i = 0; i < Size; ++i) { + fill_lane(i, value, mask(i, this->x())); + } + } + + void fill(const T &value) + { + fill(value, [](auto &&, auto &&) { return true; }); + } + + template >> + T *load_lane(const LaneIdT &lane_id, + const T *const data, + Condition &&mask, + const T &default_v) + { + static_assert(std::is_integral_v, + "lane_id must be of an integral type"); + this->data[lane_id] = mask(data) ? data[0] : default_v; + + return data + this->size_x(); + } + + template + T *load_lane(const LaneIdT &laned_id, + const T *const data, + const bool &mask, + const T &default_v) + { + return load_lane( + laned_id, data, [mask](auto &&) { return mask; }, default_v); + } + + template + T *load_lane(const LaneIdT &laned_id, const T *const data) + { + constexpr T default_v = 0; + return load_lane(laned_id, data, true, default_v); + } + + template >> + T *load(const T *const data, + const yStrideT &y_stride, + Condition &&mask, + const T &default_v) + { + auto *it = data; + for (SizeT i = 0; i < Size; ++i) { + load_lane(i, it, mask, default_v); + it += y_stride; + } + + return it; + } + + template + T *load(const T *const data, + const yStrideT &y_stride, + const bool &mask, + const T &default_v) + { + return load( + data, y_stride, [mask](auto &&) { return mask; }, default_v); + } + + template >> + T *load(const T *const data, Condition &&mask, const T &default_v) + { + return load(data, this->size_x(), mask, default_v); + } + + T *load(const T *const data, const bool &mask, const T &default_v) + { + return load( + data, [mask](auto &&) { return mask; }, default_v); + } + + T *load(const T *const data) + { + constexpr T default_v = 0; + return load(data, true, default_v); + } + + template >> + T *store_lane(const LaneIdT &lane_id, T *const data, Condition &&mask) + { + static_assert(std::is_integral_v, + "lane_id must be of an integral type"); + + if (mask(data)) { + data[0] = this->data[lane_id]; + } + + return data + this->size_x(); + } + + template + T *store_lane(const LaneIdT &lane_id, T *const data, const bool &mask) + { + return store_lane(lane_id, data, [mask](auto &&) { return mask; }); + } + + template + T *store_lane(const LaneIdT &lane_id, T *const data) + { + return store_lane(lane_id, data, true); + } + + template >> + T *store(T *const data, const yStrideT &y_stride, Condition &&condition) + { + auto *it = data; + for (SizeT i = 0; i < Size; ++i) { + store_lane(i, it, condition); + it += y_stride; + } + + return it; + } + + template + T *store(T *const data, const yStrideT &y_stride, const bool &mask) + { + return store(data, y_stride, [mask](auto &&) { return mask; }); + } + + template >> + T *store(T *const data, Condition &&condition) + { + return store(data, this->size_x(), condition); + } + + T *store(T *const data, const bool &mask) + { + return store(data, [mask](auto &&) { return mask; }); + } + + T *store(T *const data) + { + return store(data, true); + } +}; + +template +struct RegistryWindow : public RegistryData +{ + using SizeT = typename RegistryData::SizeT; + + using RegistryData::RegistryData; + + template + void advance_left(const shT &shift, const T &fill_value) + { + static_assert(std::is_integral_v, + "shift must be of an integral type"); + + uint32_t shift_r = this->size_x() - shift; + for (SizeT i = 0; i < Size; ++i) { + this->data[i] = this->shift_left(i, shift); + auto border = + i < Size - 1 ? this->shift_right(i + 1, shift_r) : fill_value; + if (this->x() >= shift_r) { + this->data[i] = border; + } + } + } + + void advance_left(const T &fill_value) + { + advance_left(1, fill_value); + } + + void advance_left() + { + constexpr T fill_value = 0; + advance_left(fill_value); + } +}; + +template +class Span +{ +public: + using value_type = T; + using size_type = SizeT; + + Span(T *const data, const SizeT size) : data_(data), size_(size) {} + + T *begin() const + { + return data(); + } + + T *end() const + { + return data() + size(); + } + + SizeT size() const + { + return size_; + } + + T *data() const + { + return data_; + } + +protected: + T *const data_; + const SizeT size_; +}; + +template +Span make_span(T *const data, const SizeT size) +{ + return Span(data, size); +} + +template +class PaddedSpan : public Span +{ +public: + using value_type = T; + using size_type = SizeT; + + PaddedSpan(T *const data, const SizeT size, const SizeT pad) + : Span(data, size), pad_(pad) + { + } + + T *padded_begin() const + { + return this->begin() - pad(); + } + + SizeT pad() const + { + return pad_; + } + +protected: + const SizeT pad_; +}; + +template +PaddedSpan + make_padded_span(T *const data, const SizeT size, const SizeT offset) +{ + return PaddedSpan(data, size, offset); +} + +template +void process_block(Results &results, + uint32_t r_size, + AData &a_data, + VData &v_data, + uint32_t block_size, + Op op, + Red red) +{ + for (uint32_t i = 0; i < block_size; ++i) { + auto v_val = v_data.broadcast(i); + for (uint32_t r = 0; r < r_size; ++r) { + results[r] = red(results[r], op(a_data[r], v_val)); + } + a_data.advance_left(); + } +} + +template +SizeT get_global_linear_id(const uint32_t wpi, const sycl::nd_item<1> &item) +{ + auto sbgroup = item.get_sub_group(); + const auto sg_loc_id = sbgroup.get_local_linear_id(); + + const SizeT sg_base_id = wpi * (item.get_global_linear_id() - sg_loc_id); + const SizeT id = sg_base_id + sg_loc_id; + + return id; +} + +template +uint32_t get_results_num(const uint32_t wpi, + const SizeT size, + const SizeT global_id, + const sycl::nd_item<1> &item) +{ + auto sbgroup = item.get_sub_group(); + + const auto sbg_size = sbgroup.get_max_local_range()[0]; + const auto size_ = sycl::sub_sat(size, global_id); + return std::min(SizeT(wpi), CeilDiv(size_, sbg_size)); +} + +template +class sliding_window1d_kernel; + +template +void submit_sliding_window1d(const PaddedSpan &a, + const Span &v, + const Op &op, + const Red &red, + Span &out, + sycl::nd_range<1> nd_range, + sycl::handler &cgh) +{ + cgh.parallel_for>( + nd_range, [=](sycl::nd_item<1> item) { + auto glid = get_global_linear_id(WorkPI, item); + + auto results = RegistryData(item); + results.fill(0); + + auto results_num = get_results_num(WorkPI, out.size(), glid, item); + + const auto *a_begin = a.begin(); + const auto *a_end = a.end(); + + auto sbgroup = item.get_sub_group(); + + const auto chunks_count = + CeilDiv(v.size(), sbgroup.get_max_local_range()[0]); + + const auto *a_ptr = &a.padded_begin()[glid]; + + auto _a_load_cond = [a_begin, a_end](auto &&ptr) { + return ptr >= a_begin && ptr < a_end; + }; + + auto a_data = RegistryWindow(item); + a_ptr = a_data.load(a_ptr, _a_load_cond, 0); + + const auto *v_ptr = &v.begin()[sbgroup.get_local_linear_id()]; + auto v_size = v.size(); + + for (uint32_t b = 0; b < chunks_count; ++b) { + auto v_data = RegistryData(item); + v_ptr = v_data.load(v_ptr, v_data.x() < v_size, 0); + + uint32_t chunk_size_ = + std::min(v_size, SizeT(v_data.total_size())); + process_block(results, results_num, a_data, v_data, chunk_size_, + op, red); + + if (b != chunks_count - 1) { + a_ptr = a_data.load_lane(a_data.size_y() - 1, a_ptr, + _a_load_cond, 0); + v_size -= v_data.total_size(); + } + } + + auto *const out_ptr = out.begin(); + auto *const out_end = out.end(); + results.store(&out_ptr[glid], + [out_end](auto &&ptr) { return ptr < out_end; }); + }); +} + +template +class sliding_window1d_small_kernel; + +template +void submit_sliding_window1d_small_kernel(const PaddedSpan &a, + const Span &v, + const Op &op, + const Red &red, + Span &out, + sycl::nd_range<1> nd_range, + sycl::handler &cgh) +{ + cgh.parallel_for>( + nd_range, [=](sycl::nd_item<1> item) { + auto glid = get_global_linear_id(WorkPI, item); + + auto results = RegistryData(item); + results.fill(0); + + auto sbgroup = item.get_sub_group(); + auto sg_size = sbgroup.get_max_local_range()[0]; + + const uint32_t to_read = WorkPI * sg_size + v.size(); + const auto *a_begin = a.begin(); + + const auto *a_ptr = &a.padded_begin()[glid]; + const auto *a_end = std::min(a_ptr + to_read, a.end()); + + auto _a_load_cond = [a_begin, a_end](auto &&ptr) { + return ptr >= a_begin && ptr < a_end; + }; + + auto a_data = RegistryWindow(item); + a_data.load(a_ptr, _a_load_cond, 0); + + const auto *v_ptr = &v.begin()[sbgroup.get_local_linear_id()]; + auto v_size = v.size(); + + auto v_data = RegistryData(item); + v_ptr = v_data.load(v_ptr, v_data.x() < v_size, 0); + + auto results_num = get_results_num(WorkPI, out.size(), glid, item); + + process_block(results, results_num, a_data, v_data, v_size, op, + red); + + auto *const out_ptr = out.begin(); + auto *const out_end = out.end(); + results.store(&out_ptr[glid], + [out_end](auto &&ptr) { return ptr < out_end; }); + }); +} + +void validate(const usm_ndarray &a, + const usm_ndarray &v, + const usm_ndarray &out, + const size_t l_pad, + const size_t r_pad); +} // namespace sliding_window1d +} // namespace statistics diff --git a/dpnp/backend/extensions/statistics/statistics_py.cpp b/dpnp/backend/extensions/statistics/statistics_py.cpp index 2f3bf6a901c1..cfb3f66e08e9 100644 --- a/dpnp/backend/extensions/statistics/statistics_py.cpp +++ b/dpnp/backend/extensions/statistics/statistics_py.cpp @@ -32,9 +32,11 @@ #include "bincount.hpp" #include "histogram.hpp" +#include "sliding_dot_product1d.hpp" PYBIND11_MODULE(_statistics_impl, m) { statistics::histogram::populate_bincount(m); statistics::histogram::populate_histogram(m); + statistics::sliding_window1d::populate_sliding_dot_product1d(m); } diff --git a/dpnp/backend/extensions/statistics/validation_utils.cpp b/dpnp/backend/extensions/statistics/validation_utils.cpp new file mode 100644 index 000000000000..195d5e4df253 --- /dev/null +++ b/dpnp/backend/extensions/statistics/validation_utils.cpp @@ -0,0 +1,190 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include "validation_utils.hpp" +#include "utils/memory_overlap.hpp" + +using statistics::validation::array_names; +using statistics::validation::array_ptr; + +namespace +{ + +sycl::queue get_queue(const std::vector &inputs, + const std::vector &outputs) +{ + auto it = std::find_if(inputs.cbegin(), inputs.cend(), + [](const array_ptr &arr) { return arr != nullptr; }); + + if (it != inputs.cend()) { + return (*it)->get_queue(); + } + + it = std::find_if(outputs.cbegin(), outputs.cend(), + [](const array_ptr &arr) { return arr != nullptr; }); + + if (it != outputs.cend()) { + return (*it)->get_queue(); + } + + throw py::value_error("No input or output arrays found"); +} +} // namespace + +namespace statistics +{ +namespace validation +{ +std::string name_of(const array_ptr &arr, const array_names &names) +{ + auto name_it = names.find(arr); + assert(name_it != names.end()); + + if (name_it != names.end()) + return "'" + name_it->second + "'"; + + return "'unknown'"; +} + +void check_writable(const std::vector &arrays, + const array_names &names) +{ + for (const auto &arr : arrays) { + if (arr != nullptr && !arr->is_writable()) { + throw py::value_error(name_of(arr, names) + + " parameter is not writable"); + } + } +} + +void check_c_contig(const std::vector &arrays, + const array_names &names) +{ + for (const auto &arr : arrays) { + if (arr != nullptr && !arr->is_c_contiguous()) { + throw py::value_error(name_of(arr, names) + + " parameter is not c-contiguos"); + } + } +} + +void check_queue(const std::vector &arrays, + const array_names &names, + const sycl::queue &exec_q) +{ + auto unequal_queue = + std::find_if(arrays.cbegin(), arrays.cend(), [&](const array_ptr &arr) { + return arr != nullptr && arr->get_queue() != exec_q; + }); + + if (unequal_queue != arrays.cend()) { + throw py::value_error( + name_of(*unequal_queue, names) + + " parameter has incompatible queue with other parameters"); + } +} + +void check_no_overlap(const array_ptr &input, + const array_ptr &output, + const array_names &names) +{ + if (input == nullptr || output == nullptr) { + return; + } + + const auto &overlap = dpctl::tensor::overlap::MemoryOverlap(); + + if (overlap(*input, *output)) { + throw py::value_error(name_of(input, names) + + " has overlapping memory segments with " + + name_of(output, names)); + } +} + +void check_no_overlap(const std::vector &inputs, + const std::vector &outputs, + const array_names &names) +{ + for (const auto &input : inputs) { + for (const auto &output : outputs) { + check_no_overlap(input, output, names); + } + } +} + +void check_num_dims(const array_ptr &arr, + const size_t ndim, + const array_names &names) +{ + if (arr != nullptr && arr->get_ndim() != ndim) { + throw py::value_error("Array " + name_of(arr, names) + " must be " + + std::to_string(ndim) + "D, but got " + + std::to_string(arr->get_ndim()) + "D."); + } +} + +void check_max_dims(const array_ptr &arr, + const size_t max_ndim, + const array_names &names) +{ + if (arr != nullptr && arr->get_ndim() > max_ndim) { + throw py::value_error( + "Array " + name_of(arr, names) + " must have no more than " + + std::to_string(max_ndim) + " dimensions, but got " + + std::to_string(arr->get_ndim()) + " dimensions."); + } +} + +void check_size_at_least(const array_ptr &arr, + const size_t size, + const array_names &names) +{ + if (arr != nullptr && arr->get_size() < size) { + throw py::value_error("Array " + name_of(arr, names) + + " must have at least " + std::to_string(size) + + " elements, but got " + + std::to_string(arr->get_size()) + " elements."); + } +} + +void common_checks(const std::vector &inputs, + const std::vector &outputs, + const array_names &names) +{ + check_writable(outputs, names); + + check_c_contig(inputs, names); + check_c_contig(outputs, names); + + auto exec_q = get_queue(inputs, outputs); + + check_queue(inputs, names, exec_q); + check_queue(outputs, names, exec_q); + + check_no_overlap(inputs, outputs, names); +} + +} // namespace validation +} // namespace statistics diff --git a/dpnp/backend/extensions/statistics/validation_utils.hpp b/dpnp/backend/extensions/statistics/validation_utils.hpp new file mode 100644 index 000000000000..845cbc9d9928 --- /dev/null +++ b/dpnp/backend/extensions/statistics/validation_utils.hpp @@ -0,0 +1,73 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include +#include + +#include "dpctl4pybind11.hpp" + +namespace statistics +{ +namespace validation +{ +using array_ptr = const dpctl::tensor::usm_ndarray *; +using array_names = std::unordered_map; + +std::string name_of(const array_ptr &arr, const array_names &names); + +void check_writable(const std::vector &arrays, + const array_names &names); +void check_c_contig(const std::vector &arrays, + const array_names &names); +void check_queue(const std::vector &arrays, + const array_names &names, + const sycl::queue &exec_q); + +void check_no_overlap(const array_ptr &inputs, + const array_ptr &outputs, + const array_names &names); +void check_no_overlap(const std::vector &inputs, + const std::vector &outputs, + const array_names &names); + +void check_num_dims(const array_ptr &arr, + const size_t ndim, + const array_names &names); +void check_max_dims(const array_ptr &arr, + const size_t max_ndim, + const array_names &names); + +void check_size_at_least(const array_ptr &arr, + const size_t size, + const array_names &names); + +void common_checks(const std::vector &inputs, + const std::vector &outputs, + const array_names &names); +} // namespace validation +} // namespace statistics diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py index 68aea4eee8d7..0d44a24dde7d 100644 --- a/dpnp/dpnp_iface_statistics.py +++ b/dpnp/dpnp_iface_statistics.py @@ -38,13 +38,19 @@ """ import dpctl.tensor as dpt +import dpctl.utils as dpu import numpy from dpctl.tensor._numpy_helper import normalize_axis_index import dpnp # pylint: disable=no-name-in-module -from .dpnp_algo import dpnp_correlate +import dpnp.backend.extensions.statistics._statistics_impl as statistics_ext +from dpnp.dpnp_utils.dpnp_utils_common import ( + result_type_for_device, + to_supported_dtypes, +) + from .dpnp_utils import call_origin, get_usm_allocations from .dpnp_utils.dpnp_utils_reduction import dpnp_wrap_reduction_call from .dpnp_utils.dpnp_utils_statistics import dpnp_cov, dpnp_median @@ -433,47 +439,181 @@ def corrcoef(x, y=None, rowvar=True, *, dtype=None): return out -def correlate(x1, x2, mode="valid"): - """ +def _get_padding(a_size, v_size, mode): + if v_size > a_size: + a_size, v_size = v_size, a_size + + if mode == "valid": + l_pad, r_pad = 0, 0 + elif mode == "same": + l_pad = v_size // 2 + r_pad = v_size - l_pad - 1 + elif mode == "full": + l_pad, r_pad = v_size - 1, v_size - 1 + else: + raise ValueError( + f"Unknown mode: {mode}. Only 'valid', 'same', 'full' are supported." + ) + + return l_pad, r_pad + + +def _run_native_sliding_dot_product1d(a, v, l_pad, r_pad): + queue = a.sycl_queue + + usm_type = dpu.get_coerced_usm_type([a.usm_type, v.usm_type]) + out_size = l_pad + r_pad + a.size - v.size + 1 + out = dpnp.empty( + shape=out_size, sycl_queue=queue, dtype=a.dtype, usm_type=usm_type + ) + + a_usm = dpnp.get_usm_ndarray(a) + v_usm = dpnp.get_usm_ndarray(v) + out_usm = dpnp.get_usm_ndarray(out) + + _manager = dpu.SequentialOrderManager[queue] + + mem_ev, corr_ev = statistics_ext.sliding_dot_product1d( + a_usm, + v_usm, + out_usm, + l_pad, + r_pad, + depends=_manager.submitted_events, + ) + _manager.add_event_pair(mem_ev, corr_ev) + + return out + + +def correlate(a, v, mode="valid"): + r""" Cross-correlation of two 1-dimensional sequences. + This function computes the correlation as generally defined in signal + processing texts [1]: + + .. math:: c_k = \sum_n a_{n+k} \cdot \overline{v}_n + + with a and v sequences being zero-padded where necessary and + :math:`\overline v` denoting complex conjugation. + For full documentation refer to :obj:`numpy.correlate`. - Limitations - ----------- - Input arrays are supported as :obj:`dpnp.ndarray`. - Size and shape of input arrays are supported to be equal. - Parameter `mode` is supported only with default value ``"valid"``. - Otherwise the function will be executed sequentially on CPU. - Input array data types are limited by supported DPNP :ref:`Data types`. + Parameters + ---------- + a : {dpnp.ndarray, usm_ndarray} + First input array. + v : {dpnp.ndarray, usm_ndarray} + Second input array. + mode : {'valid', 'same', 'full'}, optional + Refer to the :obj:`dpnp.convolve` docstring. Note that the default + is ``'valid'``, unlike :obj:`dpnp.convolve`, which uses ``'full'``. + + Default: ``'valid'``. + + Notes + ----- + The definition of correlation above is not unique and sometimes + correlation may be defined differently. Another common definition is [1]: + + .. math:: c'_k = \sum_n a_{n} \cdot \overline{v_{n+k}} + + which is related to :math:`c_k` by :math:`c'_k = c_{-k}`. + + References + ---------- + .. [1] Wikipedia, "Cross-correlation", + https://en.wikipedia.org/wiki/Cross-correlation + + Returns + ------- + out : {dpnp.ndarray} + Discrete cross-correlation of `a` and `v`. See Also -------- - :obj:`dpnp.convolve` : Discrete, linear convolution of - two one-dimensional sequences. + :obj:`dpnp.convolve` : Discrete, linear convolution of two + one-dimensional sequences. + Examples -------- >>> import dpnp as np - >>> x = np.correlate([1, 2, 3], [0, 1, 0.5]) - >>> [i for i in x] - [3.5] + >>> a = np.array([1, 2, 3], dtype=np.float32) + >>> v = np.array([0, 1, 0.5], dtype=np.float32) + >>> np.correlate(a, v) + array([3.5], dtype=float32) + >>> np.correlate(a, v, "same") + array([2. , 3.5, 3. ], dtype=float32) + >>> np.correlate([1, 2, 3], [0, 1, 0.5], "full") + array([0.5, 2. , 3.5, 3. , 0. ], dtype=float32) + + Using complex sequences: + + >>> ac = np.array([1+1j, 2, 3-1j], dtype=np.complex64) + >>> vc = np.array([0, 1, 0.5j], dtype=np.complex64) + >>> np.correlate(ac, vc, 'full') + array([0.5-0.5j, 1. +0.j , 1.5-1.5j, 3. -1.j , 0. +0.j ], dtype=complex64) + + Note that you get the time reversed, complex conjugated result + (:math:`\overline{c_{-k}}`) when the two input sequences a and v change + places: + + >>> np.correlate([0, 1, 0.5j], [1+1j, 2, 3-1j], 'full') + array([0. +0.j , 3. +1.j , 1.5+1.5j, 1. +0.j , 0.5+0.5j], dtype=complex64) """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False) - if x1_desc and x2_desc: - if x1_desc.size != x2_desc.size or x1_desc.size == 0: - pass - elif x1_desc.shape != x2_desc.shape: - pass - elif mode != "valid": - pass - else: - return dpnp_correlate(x1_desc, x2_desc).get_pyobj() + dpnp.check_supported_arrays_type(a, v) + + if a.size == 0 or v.size == 0: + raise ValueError( + f"Array arguments cannot be empty. " + f"Received sizes: a.size={a.size}, v.size={v.size}" + ) + if a.ndim != 1 or v.ndim != 1: + raise ValueError( + f"Only 1-dimensional arrays are supported. " + f"Received shapes: a.shape={a.shape}, v.shape={v.shape}" + ) + + supported_types = statistics_ext.sliding_dot_product1d_dtypes() + + device = a.sycl_device + rdtype = result_type_for_device([a.dtype, v.dtype], device) + supported_dtype = to_supported_dtypes(rdtype, supported_types, device) + + if supported_dtype is None: + raise ValueError( + f"function '{correlate}' does not support input types " + f"({a.dtype}, {v.dtype}), " + "and the inputs could not be coerced to any " + f"supported types. List of supported types: {supported_types}" + ) + + if dpnp.issubdtype(v.dtype, dpnp.complexfloating): + v = dpnp.conj(v) + + revert = False + if v.size > a.size: + revert = True + a, v = v, a + + l_pad, r_pad = _get_padding(a.size, v.size, mode) + + a_casted = dpnp.asarray(a, dtype=supported_dtype, order="C") + v_casted = dpnp.asarray(v, dtype=supported_dtype, order="C") + + if v.size > a.size: + a_casted, v_casted = v_casted, a_casted + + r = _run_native_sliding_dot_product1d(a_casted, v_casted, l_pad, r_pad) + + if revert: + r = r[::-1] - return call_origin(numpy.correlate, x1, x2, mode=mode) + return dpnp.asarray(r, dtype=rdtype, order="C") def cov( diff --git a/dpnp/dpnp_utils/dpnp_utils_common.py b/dpnp/dpnp_utils/dpnp_utils_common.py new file mode 100644 index 000000000000..54e97d03c114 --- /dev/null +++ b/dpnp/dpnp_utils/dpnp_utils_common.py @@ -0,0 +1,81 @@ +# ***************************************************************************** +# Copyright (c) 2023-2024, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + + +from collections.abc import Iterable + +from dpctl.tensor._type_utils import _can_cast + +import dpnp +from dpnp.dpnp_utils import map_dtype_to_device + +__all__ = ["result_type_for_device", "to_supported_dtypes"] + + +def result_type_for_device(dtypes, device): + """Works as dpnp.result_type, but taking into account the device capabilities.""" + + rt = dpnp.result_type(*dtypes) + return map_dtype_to_device(rt, device) + + +def to_supported_dtypes(dtypes, supported_types, device): + """ + Convert input dtypes to the supported ones based on the device capabilities. + Return types from the supported_types list that are compatible with the input dtypes. + If no compatible types are found, return None. + """ + + has_fp64 = device.has_aspect_fp64 + has_fp16 = device.has_aspect_fp16 + + def is_castable(dtype, stype): + return _can_cast(dtype, stype, has_fp16, has_fp64) + + if dtypes in supported_types: + return dtypes + + for stypes in supported_types: + if not isinstance(dtypes, Iterable): + if isinstance(stypes, Iterable): + raise ValueError( + "Input and supported types must have the same length" + ) + + if is_castable(dtypes, stypes): + return stypes + else: + if not isinstance(stypes, Iterable) or len(dtypes) != len(stypes): + raise ValueError( + "Input and supported types must have the same length" + ) + + if all( + is_castable(dtype, stype) + for dtype, stype in zip(dtypes, stypes) + ): + return stypes + + return None diff --git a/dpnp/tests/test_statistics.py b/dpnp/tests/test_statistics.py index 8dbccb379d9d..96e59d9bbede 100644 --- a/dpnp/tests/test_statistics.py +++ b/dpnp/tests/test_statistics.py @@ -628,6 +628,79 @@ def test_corrcoef_scalar(self): assert_dtype_allclose(result, expected) +class TestCorrelate: + @pytest.mark.parametrize( + "a, v", [([1], [1, 2, 3]), ([1, 2, 3], [1]), ([1, 2, 3], [1, 2])] + ) + @pytest.mark.parametrize("mode", [None, "full", "valid", "same"]) + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) + def test_correlate(self, a, v, mode, dtype): + an = numpy.array(a, dtype=dtype) + vn = numpy.array(v, dtype=dtype) + ad = dpnp.array(an) + vd = dpnp.array(vn) + + if mode is None: + expected = numpy.correlate(an, vn) + result = dpnp.correlate(ad, vd) + else: + expected = numpy.correlate(an, vn, mode=mode) + result = dpnp.correlate(ad, vd, mode=mode) + + assert_dtype_allclose(result, expected) + + def test_correlate_mode_error(self): + a = dpnp.arange(5) + v = dpnp.arange(3) + + # invalid mode + with pytest.raises(ValueError): + dpnp.correlate(a, v, mode="unknown") + + @pytest.mark.parametrize("a, v", [([], [1]), ([1], []), ([], [])]) + def test_correlate_empty(self, a, v): + a = dpnp.asarray(a) + v = dpnp.asarray(v) + + with pytest.raises(ValueError): + dpnp.correlate(a, v) + + @pytest.mark.parametrize( + "a, v", + [ + ([[1, 2], [2, 3]], [1]), + ([1], [[1, 2], [2, 3]]), + ([[1, 2], [2, 3]], [[1, 2], [2, 3]]), + ], + ) + def test_correlate_shape_error(self, a, v): + a = dpnp.asarray(a) + v = dpnp.asarray(v) + + with pytest.raises(ValueError): + dpnp.correlate(a, v) + + @pytest.mark.parametrize("size", [2, 10**1, 10**2, 10**3, 10**4, 10**5]) + def test_correlate_different_sizes(self, size): + a = numpy.random.rand(size).astype(numpy.float32) + v = numpy.random.rand(size // 2).astype(numpy.float32) + + ad = dpnp.array(a) + vd = dpnp.array(v) + + expected = numpy.correlate(a, v) + result = dpnp.correlate(ad, vd) + + assert_dtype_allclose(result, expected, factor=20) + + def test_correlate_another_sycl_queue(self): + a = dpnp.arange(5, sycl_queue=dpctl.SyclQueue()) + v = dpnp.arange(3, sycl_queue=dpctl.SyclQueue()) + + with pytest.raises(ValueError): + dpnp.correlate(a, v) + + @pytest.mark.parametrize( "dtype", get_all_dtypes(no_bool=True, no_none=True, no_complex=True) ) diff --git a/dpnp/tests/test_sycl_queue.py b/dpnp/tests/test_sycl_queue.py index 0041fbff4752..ac96e5d04af9 100644 --- a/dpnp/tests/test_sycl_queue.py +++ b/dpnp/tests/test_sycl_queue.py @@ -726,6 +726,7 @@ def test_reduce_hypot(device): [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], [[0.7, 0.8, 0.9], [1.0, 1.1, 1.2]], ), + pytest.param("correlate", [1, 2, 3], [4, 5, 6]), pytest.param("cross", [1.0, 2.0, 3.0], [4.0, 5.0, 6.0]), pytest.param("digitize", [0.2, 6.4, 3.0], [0.0, 1.0, 2.5, 4.0]), pytest.param( diff --git a/dpnp/tests/test_usm_type.py b/dpnp/tests/test_usm_type.py index 29c9419fe84d..9769a4d97ef9 100644 --- a/dpnp/tests/test_usm_type.py +++ b/dpnp/tests/test_usm_type.py @@ -696,6 +696,7 @@ def test_1in_1out(func, data, usm_type): [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], [[0.7, 0.8, 0.9], [1.0, 1.1, 1.2]], ), + pytest.param("correlate", [1, 2, 3], [0, 1, 0.5]), # dpnp.dot has 3 different implementations based on input arrays dtype # checking all of them pytest.param("dot", [3.0, 4.0, 5.0], [1.0, 2.0, 3.0]), diff --git a/dpnp/tests/third_party/cupy/statistics_tests/test_correlation.py b/dpnp/tests/third_party/cupy/statistics_tests/test_correlation.py index f8d7feb75c73..e1ee53748f88 100644 --- a/dpnp/tests/third_party/cupy/statistics_tests/test_correlation.py +++ b/dpnp/tests/third_party/cupy/statistics_tests/test_correlation.py @@ -164,7 +164,6 @@ def test_cov_empty(self): } ) ) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") class TestCorrelateShapeCombination(unittest.TestCase): @testing.for_all_dtypes(no_float16=True) @@ -176,7 +175,6 @@ def test_correlate(self, xp, dtype): @pytest.mark.parametrize("mode", ["valid", "full", "same"]) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") class TestCorrelate: @testing.for_all_dtypes() diff --git a/tests_external/skipped_tests_numpy.tbl b/tests_external/skipped_tests_numpy.tbl index a1ca87fb4bdb..8553bb1faa3c 100644 --- a/tests_external/skipped_tests_numpy.tbl +++ b/tests_external/skipped_tests_numpy.tbl @@ -1022,7 +1022,6 @@ tests/test_numeric.py::TestClip::test_type_cast_11 tests/test_numeric.py::TestClip::test_type_cast_12 tests/test_numeric.py::TestConvolve::test_no_overwrite tests/test_numeric.py::TestConvolve::test_object -tests/test_numeric.py::TestCorrelate::test_object tests/test_numeric.py::TestCross::test_2x2 tests/test_numeric.py::TestCross::test_broadcasting tests/test_numeric.py::TestCross::test_broadcasting_shapes From 9130a5350396b68fc40d4e0f07b56a3fd6cccb3b Mon Sep 17 00:00:00 2001 From: Alexander Kalistratov Date: Thu, 28 Nov 2024 04:42:30 +0100 Subject: [PATCH 2/5] Fix signed/unsigned compare warnings & small fixes --- .../extensions/statistics/validation_utils.cpp | 17 ++++++++++------- dpnp/dpnp_iface_statistics.py | 3 --- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/dpnp/backend/extensions/statistics/validation_utils.cpp b/dpnp/backend/extensions/statistics/validation_utils.cpp index 195d5e4df253..d5b2ae381ea1 100644 --- a/dpnp/backend/extensions/statistics/validation_utils.cpp +++ b/dpnp/backend/extensions/statistics/validation_utils.cpp @@ -138,10 +138,11 @@ void check_num_dims(const array_ptr &arr, const size_t ndim, const array_names &names) { - if (arr != nullptr && arr->get_ndim() != ndim) { + size_t arr_n_dim = arr != nullptr ? arr->get_ndim() : 0; + if (arr != nullptr && arr_n_dim != ndim) { throw py::value_error("Array " + name_of(arr, names) + " must be " + std::to_string(ndim) + "D, but got " + - std::to_string(arr->get_ndim()) + "D."); + std::to_string(arr_n_dim) + "D."); } } @@ -149,11 +150,12 @@ void check_max_dims(const array_ptr &arr, const size_t max_ndim, const array_names &names) { - if (arr != nullptr && arr->get_ndim() > max_ndim) { + size_t arr_n_dim = arr != nullptr ? arr->get_ndim() : 0; + if (arr != nullptr && arr_n_dim > max_ndim) { throw py::value_error( "Array " + name_of(arr, names) + " must have no more than " + std::to_string(max_ndim) + " dimensions, but got " + - std::to_string(arr->get_ndim()) + " dimensions."); + std::to_string(arr_n_dim) + " dimensions."); } } @@ -161,11 +163,12 @@ void check_size_at_least(const array_ptr &arr, const size_t size, const array_names &names) { - if (arr != nullptr && arr->get_size() < size) { + size_t arr_size = arr != nullptr ? arr->get_size() : 0; + if (arr != nullptr && arr_size < size) { throw py::value_error("Array " + name_of(arr, names) + " must have at least " + std::to_string(size) + - " elements, but got " + - std::to_string(arr->get_size()) + " elements."); + " elements, but got " + std::to_string(arr_size) + + " elements."); } } diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py index 0d44a24dde7d..3cd04561473e 100644 --- a/dpnp/dpnp_iface_statistics.py +++ b/dpnp/dpnp_iface_statistics.py @@ -605,9 +605,6 @@ def correlate(a, v, mode="valid"): a_casted = dpnp.asarray(a, dtype=supported_dtype, order="C") v_casted = dpnp.asarray(v, dtype=supported_dtype, order="C") - if v.size > a.size: - a_casted, v_casted = v_casted, a_casted - r = _run_native_sliding_dot_product1d(a_casted, v_casted, l_pad, r_pad) if revert: From 943726f4c86ac571fa1112b87a3b017d1aa3638a Mon Sep 17 00:00:00 2001 From: Alexander Kalistratov Date: Sat, 7 Dec 2024 17:11:56 +0100 Subject: [PATCH 3/5] Apply review comments --- .../extensions/statistics/bincount.hpp | 7 +--- dpnp/backend/extensions/statistics/common.cpp | 8 +--- dpnp/backend/extensions/statistics/common.hpp | 7 +--- .../extensions/statistics/dispatch_table.hpp | 8 +--- .../extensions/statistics/histogram.cpp | 4 +- .../extensions/statistics/histogram.hpp | 9 ++--- .../statistics/histogram_common.cpp | 3 -- .../statistics/histogram_common.hpp | 8 ---- .../statistics/sliding_dot_product1d.cpp | 10 ++--- .../statistics/sliding_dot_product1d.hpp | 10 +---- .../statistics/sliding_window1d.cpp | 3 -- .../statistics/sliding_window1d.hpp | 10 ----- .../statistics/validation_utils.cpp | 7 +--- .../statistics/validation_utils.hpp | 7 +--- dpnp/dpnp_iface_statistics.py | 37 +++++++++---------- 15 files changed, 40 insertions(+), 98 deletions(-) diff --git a/dpnp/backend/extensions/statistics/bincount.hpp b/dpnp/backend/extensions/statistics/bincount.hpp index 70a17431383f..f904bb02cbe0 100644 --- a/dpnp/backend/extensions/statistics/bincount.hpp +++ b/dpnp/backend/extensions/statistics/bincount.hpp @@ -32,9 +32,7 @@ namespace dpctl_td_ns = dpctl::tensor::type_dispatch; -namespace statistics -{ -namespace histogram +namespace statistics::histogram { struct Bincount { @@ -62,5 +60,4 @@ struct Bincount }; void populate_bincount(py::module_ m); -} // namespace histogram -} // namespace statistics +} // namespace statistics::histogram diff --git a/dpnp/backend/extensions/statistics/common.cpp b/dpnp/backend/extensions/statistics/common.cpp index 961205293725..7238c8ebbfe6 100644 --- a/dpnp/backend/extensions/statistics/common.cpp +++ b/dpnp/backend/extensions/statistics/common.cpp @@ -29,11 +29,8 @@ namespace dpctl_td_ns = dpctl::tensor::type_dispatch; -namespace statistics +namespace statistics::common { -namespace common -{ - size_t get_max_local_size(const sycl::device &device) { constexpr const int default_max_cpu_local_size = 256; @@ -120,5 +117,4 @@ pybind11::dtype dtype_from_typenum(int dst_typenum) } } -} // namespace common -} // namespace statistics +} // namespace statistics::common diff --git a/dpnp/backend/extensions/statistics/common.hpp b/dpnp/backend/extensions/statistics/common.hpp index 99a31002ea13..26e49bce8e7c 100644 --- a/dpnp/backend/extensions/statistics/common.hpp +++ b/dpnp/backend/extensions/statistics/common.hpp @@ -36,9 +36,7 @@ #include "utils/math_utils.hpp" // clang-format on -namespace statistics -{ -namespace common +namespace statistics::common { template @@ -200,5 +198,4 @@ sycl::nd_range<1> // headers of dpctl. pybind11::dtype dtype_from_typenum(int dst_typenum); -} // namespace common -} // namespace statistics +} // namespace statistics::common diff --git a/dpnp/backend/extensions/statistics/dispatch_table.hpp b/dpnp/backend/extensions/statistics/dispatch_table.hpp index 42c170c53617..d0219ecd50e9 100644 --- a/dpnp/backend/extensions/statistics/dispatch_table.hpp +++ b/dpnp/backend/extensions/statistics/dispatch_table.hpp @@ -39,11 +39,8 @@ namespace dpctl_td_ns = dpctl::tensor::type_dispatch; namespace py = pybind11; -namespace statistics +namespace statistics::common { -namespace common -{ - template struct one_of { @@ -386,5 +383,4 @@ class DispatchTable2 Table2 table; }; -} // namespace common -} // namespace statistics +} // namespace statistics::common diff --git a/dpnp/backend/extensions/statistics/histogram.cpp b/dpnp/backend/extensions/statistics/histogram.cpp index 848f89451236..cd337718a272 100644 --- a/dpnp/backend/extensions/statistics/histogram.cpp +++ b/dpnp/backend/extensions/statistics/histogram.cpp @@ -26,9 +26,7 @@ #include #include #include -#include -#include -#include +#include #include #include diff --git a/dpnp/backend/extensions/statistics/histogram.hpp b/dpnp/backend/extensions/statistics/histogram.hpp index e89737c12946..7df877428aa1 100644 --- a/dpnp/backend/extensions/statistics/histogram.hpp +++ b/dpnp/backend/extensions/statistics/histogram.hpp @@ -29,11 +29,9 @@ #include "dispatch_table.hpp" -namespace dpctl_td_ns = dpctl::tensor::type_dispatch; +// namespace dpctl_td_ns = dpctl::tensor::type_dispatch; -namespace statistics -{ -namespace histogram +namespace statistics::histogram { struct Histogram { @@ -59,5 +57,4 @@ struct Histogram }; void populate_histogram(py::module_ m); -} // namespace histogram -} // namespace statistics +} // namespace statistics::histogram diff --git a/dpnp/backend/extensions/statistics/histogram_common.cpp b/dpnp/backend/extensions/statistics/histogram_common.cpp index af47fe633674..65eba05be592 100644 --- a/dpnp/backend/extensions/statistics/histogram_common.cpp +++ b/dpnp/backend/extensions/statistics/histogram_common.cpp @@ -26,12 +26,9 @@ #include #include #include -#include #include #include "dpctl4pybind11.hpp" -#include "utils/memory_overlap.hpp" -#include "utils/output_validation.hpp" #include "utils/type_dispatch.hpp" #include diff --git a/dpnp/backend/extensions/statistics/histogram_common.hpp b/dpnp/backend/extensions/statistics/histogram_common.hpp index e7503fe98778..9a89ae340a33 100644 --- a/dpnp/backend/extensions/statistics/histogram_common.hpp +++ b/dpnp/backend/extensions/statistics/histogram_common.hpp @@ -29,14 +29,6 @@ #include "common.hpp" -namespace dpctl -{ -namespace tensor -{ -class usm_ndarray; -} -} // namespace dpctl - using dpctl::tensor::usm_ndarray; namespace statistics diff --git a/dpnp/backend/extensions/statistics/sliding_dot_product1d.cpp b/dpnp/backend/extensions/statistics/sliding_dot_product1d.cpp index ddc501bc13be..de3dafa2087d 100644 --- a/dpnp/backend/extensions/statistics/sliding_dot_product1d.cpp +++ b/dpnp/backend/extensions/statistics/sliding_dot_product1d.cpp @@ -23,12 +23,8 @@ // THE POSSIBILITY OF SUCH DAMAGE. //***************************************************************************** -#include #include #include -#include -#include -#include #include #include @@ -42,7 +38,7 @@ #include "sliding_dot_product1d.hpp" #include "sliding_window1d.hpp" -#include +// #include namespace dpctl_td_ns = dpctl::tensor::type_dispatch; using dpctl::tensor::usm_ndarray; @@ -101,7 +97,9 @@ struct SlidingDotProductF } }; -using SupportedTypes = std::tuple #include -namespace dpctl_td_ns = dpctl::tensor::type_dispatch; - -namespace statistics -{ -namespace sliding_window1d +namespace statistics::sliding_window1d { struct SlidingDotProduct1d { @@ -62,5 +57,4 @@ struct SlidingDotProduct1d }; void populate_sliding_dot_product1d(py::module_ m); -} // namespace sliding_window1d -} // namespace statistics +} // namespace statistics::sliding_window1d diff --git a/dpnp/backend/extensions/statistics/sliding_window1d.cpp b/dpnp/backend/extensions/statistics/sliding_window1d.cpp index bf7ec2424c8e..412f272353a8 100644 --- a/dpnp/backend/extensions/statistics/sliding_window1d.cpp +++ b/dpnp/backend/extensions/statistics/sliding_window1d.cpp @@ -23,10 +23,7 @@ // THE POSSIBILITY OF SUCH DAMAGE. //***************************************************************************** -#include -#include #include -#include #include #include "dpctl4pybind11.hpp" diff --git a/dpnp/backend/extensions/statistics/sliding_window1d.hpp b/dpnp/backend/extensions/statistics/sliding_window1d.hpp index c60c9a52adb9..074a780c1029 100644 --- a/dpnp/backend/extensions/statistics/sliding_window1d.hpp +++ b/dpnp/backend/extensions/statistics/sliding_window1d.hpp @@ -26,23 +26,13 @@ #pragma once #include "utils/math_utils.hpp" -#include #include -#include #include #include #include "common.hpp" -namespace dpctl -{ -namespace tensor -{ -class usm_ndarray; -} -} // namespace dpctl - using dpctl::tensor::usm_ndarray; namespace statistics diff --git a/dpnp/backend/extensions/statistics/validation_utils.cpp b/dpnp/backend/extensions/statistics/validation_utils.cpp index d5b2ae381ea1..1a3415543b51 100644 --- a/dpnp/backend/extensions/statistics/validation_utils.cpp +++ b/dpnp/backend/extensions/statistics/validation_utils.cpp @@ -53,9 +53,7 @@ sycl::queue get_queue(const std::vector &inputs, } } // namespace -namespace statistics -{ -namespace validation +namespace statistics::validation { std::string name_of(const array_ptr &arr, const array_names &names) { @@ -189,5 +187,4 @@ void common_checks(const std::vector &inputs, check_no_overlap(inputs, outputs, names); } -} // namespace validation -} // namespace statistics +} // namespace statistics::validation diff --git a/dpnp/backend/extensions/statistics/validation_utils.hpp b/dpnp/backend/extensions/statistics/validation_utils.hpp index 845cbc9d9928..a1e0ff521ce6 100644 --- a/dpnp/backend/extensions/statistics/validation_utils.hpp +++ b/dpnp/backend/extensions/statistics/validation_utils.hpp @@ -31,9 +31,7 @@ #include "dpctl4pybind11.hpp" -namespace statistics -{ -namespace validation +namespace statistics::validation { using array_ptr = const dpctl::tensor::usm_ndarray *; using array_names = std::unordered_map; @@ -69,5 +67,4 @@ void check_size_at_least(const array_ptr &arr, void common_checks(const std::vector &inputs, const std::vector &outputs, const array_names &names); -} // namespace validation -} // namespace statistics +} // namespace statistics::validation diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py index 3cd04561473e..b82a74e2f95f 100644 --- a/dpnp/dpnp_iface_statistics.py +++ b/dpnp/dpnp_iface_statistics.py @@ -440,8 +440,7 @@ def corrcoef(x, y=None, rowvar=True, *, dtype=None): def _get_padding(a_size, v_size, mode): - if v_size > a_size: - a_size, v_size = v_size, a_size + assert v_size <= a_size if mode == "valid": l_pad, r_pad = 0, 0 @@ -463,9 +462,8 @@ def _run_native_sliding_dot_product1d(a, v, l_pad, r_pad): usm_type = dpu.get_coerced_usm_type([a.usm_type, v.usm_type]) out_size = l_pad + r_pad + a.size - v.size + 1 - out = dpnp.empty( - shape=out_size, sycl_queue=queue, dtype=a.dtype, usm_type=usm_type - ) + # out type is the same as input type + out = dpnp.empty_like(a, shape=out_size, usm_type=usm_type) a_usm = dpnp.get_usm_ndarray(a) v_usm = dpnp.get_usm_ndarray(v) @@ -491,11 +489,11 @@ def correlate(a, v, mode="valid"): Cross-correlation of two 1-dimensional sequences. This function computes the correlation as generally defined in signal - processing texts [1]: + processing texts [1]_: .. math:: c_k = \sum_n a_{n+k} \cdot \overline{v}_n - with a and v sequences being zero-padded where necessary and + with `a` and `v` sequences being zero-padded where necessary and :math:`\overline v` denoting complex conjugation. For full documentation refer to :obj:`numpy.correlate`. @@ -506,16 +504,16 @@ def correlate(a, v, mode="valid"): First input array. v : {dpnp.ndarray, usm_ndarray} Second input array. - mode : {'valid', 'same', 'full'}, optional + mode : {"valid", "same", "full"}, optional Refer to the :obj:`dpnp.convolve` docstring. Note that the default - is ``'valid'``, unlike :obj:`dpnp.convolve`, which uses ``'full'``. + is ``"valid"``, unlike :obj:`dpnp.convolve`, which uses ``"full"``. - Default: ``'valid'``. + Default: ``"valid"``. Notes ----- The definition of correlation above is not unique and sometimes - correlation may be defined differently. Another common definition is [1]: + correlation may be defined differently. Another common definition is [1]_: .. math:: c'_k = \sum_n a_{n} \cdot \overline{v_{n+k}} @@ -533,8 +531,8 @@ def correlate(a, v, mode="valid"): See Also -------- - :obj:`dpnp.convolve` : Discrete, linear convolution of two - one-dimensional sequences. + :obj:`dpnp.convolve` : Discrete, linear convolution of two one-dimensional + sequences. Examples @@ -546,7 +544,7 @@ def correlate(a, v, mode="valid"): array([3.5], dtype=float32) >>> np.correlate(a, v, "same") array([2. , 3.5, 3. ], dtype=float32) - >>> np.correlate([1, 2, 3], [0, 1, 0.5], "full") + >>> np.correlate([a, v, "full") array([0.5, 2. , 3.5, 3. , 0. ], dtype=float32) Using complex sequences: @@ -557,10 +555,10 @@ def correlate(a, v, mode="valid"): array([0.5-0.5j, 1. +0.j , 1.5-1.5j, 3. -1.j , 0. +0.j ], dtype=complex64) Note that you get the time reversed, complex conjugated result - (:math:`\overline{c_{-k}}`) when the two input sequences a and v change + (:math:`\overline{c_{-k}}`) when the two input sequences `a` and `v` change places: - >>> np.correlate([0, 1, 0.5j], [1+1j, 2, 3-1j], 'full') + >>> np.correlate(vc, ac, 'full') array([0. +0.j , 3. +1.j , 1.5+1.5j, 1. +0.j , 0.5+0.5j], dtype=complex64) """ @@ -586,10 +584,11 @@ def correlate(a, v, mode="valid"): if supported_dtype is None: raise ValueError( - f"function '{correlate}' does not support input types " - f"({a.dtype}, {v.dtype}), " + f"function does not support input types " + f"({a.dtype.name}, {v.dtype.name}), " "and the inputs could not be coerced to any " - f"supported types. List of supported types: {supported_types}" + f"supported types. List of supported types: " + f"{[st.name for st in supported_types]}" ) if dpnp.issubdtype(v.dtype, dpnp.complexfloating): From a4e627df3d5d8aa4aaf8da5241d295914fdcdb76 Mon Sep 17 00:00:00 2001 From: Alexander Kalistratov Date: Mon, 9 Dec 2024 18:07:28 +0100 Subject: [PATCH 4/5] Apply review comments 2 --- dpnp/dpnp_iface_statistics.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py index b82a74e2f95f..76d6c16defd1 100644 --- a/dpnp/dpnp_iface_statistics.py +++ b/dpnp/dpnp_iface_statistics.py @@ -510,6 +510,11 @@ def correlate(a, v, mode="valid"): Default: ``"valid"``. + Returns + ------- + out : dpnp.ndarray + Discrete cross-correlation of `a` and `v`. + Notes ----- The definition of correlation above is not unique and sometimes @@ -524,11 +529,6 @@ def correlate(a, v, mode="valid"): .. [1] Wikipedia, "Cross-correlation", https://en.wikipedia.org/wiki/Cross-correlation - Returns - ------- - out : {dpnp.ndarray} - Discrete cross-correlation of `a` and `v`. - See Also -------- :obj:`dpnp.convolve` : Discrete, linear convolution of two one-dimensional From bb795fbb561cd441e92cdcf8ad6a2e2f917bcbe2 Mon Sep 17 00:00:00 2001 From: Alexander Kalistratov Date: Mon, 9 Dec 2024 18:11:40 +0100 Subject: [PATCH 5/5] Apply review comments 3 --- dpnp/backend/extensions/statistics/histogram.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/dpnp/backend/extensions/statistics/histogram.hpp b/dpnp/backend/extensions/statistics/histogram.hpp index a1eeb191884d..c7cf5aeb1b80 100644 --- a/dpnp/backend/extensions/statistics/histogram.hpp +++ b/dpnp/backend/extensions/statistics/histogram.hpp @@ -31,8 +31,6 @@ #include "dispatch_table.hpp" #include "dpctl4pybind11.hpp" -// namespace dpctl_td_ns = dpctl::tensor::type_dispatch; - namespace statistics::histogram { struct Histogram