From 4e2640398c124b06c30d261b0b6c3717c25edd3a Mon Sep 17 00:00:00 2001
From: Alexander Kalistratov <alexander.kalistratov@intel.com>
Date: Tue, 22 Oct 2024 17:53:10 +0200
Subject: [PATCH 1/5] Implementation of correlate

---
 .../extensions/statistics/CMakeLists.txt      |   5 +-
 .../extensions/statistics/dispatch_table.hpp  |  98 +++
 .../statistics/histogram_common.cpp           | 140 ++--
 .../statistics/sliding_dot_product1d.cpp      | 170 +++++
 .../statistics/sliding_dot_product1d.hpp      |  66 ++
 .../statistics/sliding_window1d.cpp           |  96 +++
 .../statistics/sliding_window1d.hpp           | 660 ++++++++++++++++++
 .../extensions/statistics/statistics_py.cpp   |   2 +
 .../statistics/validation_utils.cpp           | 190 +++++
 .../statistics/validation_utils.hpp           |  73 ++
 dpnp/dpnp_iface_statistics.py                 | 194 ++++-
 dpnp/dpnp_utils/dpnp_utils_common.py          |  81 +++
 dpnp/tests/test_statistics.py                 |  73 ++
 dpnp/tests/test_sycl_queue.py                 |   1 +
 dpnp/tests/test_usm_type.py                   |   1 +
 .../cupy/statistics_tests/test_correlation.py |   2 -
 tests_external/skipped_tests_numpy.tbl        |   1 -
 17 files changed, 1722 insertions(+), 131 deletions(-)
 create mode 100644 dpnp/backend/extensions/statistics/sliding_dot_product1d.cpp
 create mode 100644 dpnp/backend/extensions/statistics/sliding_dot_product1d.hpp
 create mode 100644 dpnp/backend/extensions/statistics/sliding_window1d.cpp
 create mode 100644 dpnp/backend/extensions/statistics/sliding_window1d.hpp
 create mode 100644 dpnp/backend/extensions/statistics/validation_utils.cpp
 create mode 100644 dpnp/backend/extensions/statistics/validation_utils.hpp
 create mode 100644 dpnp/dpnp_utils/dpnp_utils_common.py
diff --git a/dpnp/backend/extensions/statistics/CMakeLists.txt b/dpnp/backend/extensions/statistics/CMakeLists.txt
index 26dda2800086..dcec77ae603a 100644
--- a/dpnp/backend/extensions/statistics/CMakeLists.txt
+++ b/dpnp/backend/extensions/statistics/CMakeLists.txt
@@ -26,11 +26,14 @@
 
 set(python_module_name _statistics_impl)
 set(_module_src
-    ${CMAKE_CURRENT_SOURCE_DIR}/common.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/bincount.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/common.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/histogram.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/histogram_common.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/sliding_dot_product1d.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/sliding_window1d.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/statistics_py.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/validation_utils.cpp
 )
 
 pybind11_add_module(${python_module_name} MODULE ${_module_src})
diff --git a/dpnp/backend/extensions/statistics/dispatch_table.hpp b/dpnp/backend/extensions/statistics/dispatch_table.hpp
index ce11de891318..42c170c53617 100644
--- a/dpnp/backend/extensions/statistics/dispatch_table.hpp
+++ b/dpnp/backend/extensions/statistics/dispatch_table.hpp
@@ -97,6 +97,32 @@ using DTypePair = std::pair<DType, DType>;
 using SupportedDTypeList = std::vector<DType>;
 using SupportedDTypeList2 = std::vector<DTypePair>;
 
+template <typename FnT,
+          typename SupportedTypes,
+          template <typename>
+          typename Func>
+struct TableBuilder
+{
+    template <typename _FnT, typename T>
+    struct impl
+    {
+        static constexpr bool is_defined = one_of_v<T, SupportedTypes>;
+
+        _FnT get()
+        {
+            if constexpr (is_defined) {
+                return Func<T>::impl;
+            }
+            else {
+                return nullptr;
+            }
+        }
+    };
+
+    using type =
+        dpctl_td_ns::DispatchVectorBuilder<FnT, impl, dpctl_td_ns::num_types>;
+};
+
 template <typename FnT,
           typename SupportedTypes,
           template <typename, typename>
@@ -124,6 +150,78 @@ struct TableBuilder2
         dpctl_td_ns::DispatchTableBuilder<FnT, impl, dpctl_td_ns::num_types>;
 };
 
+template <typename FnT>
+class DispatchTable
+{
+public:
+    DispatchTable(std::string name) : name(name) {}
+
+    template <typename SupportedTypes, template <typename> typename Func>
+    void populate_dispatch_table()
+    {
+        using TBulder = typename TableBuilder<FnT, SupportedTypes, Func>::type;
+        TBulder builder;
+
+        builder.populate_dispatch_vector(table);
+        populate_supported_types();
+    }
+
+    FnT get_unsafe(int _typenum) const
+    {
+        auto array_types = dpctl_td_ns::usm_ndarray_types();
+        const int type_id = array_types.typenum_to_lookup_id(_typenum);
+
+        return table[type_id];
+    }
+
+    FnT get(int _typenum) const
+    {
+        auto fn = get_unsafe(_typenum);
+
+        if (fn == nullptr) {
+            auto array_types = dpctl_td_ns::usm_ndarray_types();
+            const int _type_id = array_types.typenum_to_lookup_id(_typenum);
+
+            py::dtype _dtype = dtype_from_typenum(_type_id);
+            auto _type_pos = std::find(supported_types.begin(),
+                                       supported_types.end(), _dtype);
+            if (_type_pos == supported_types.end()) {
+                py::str types = py::str(py::cast(supported_types));
+                py::str dtype = py::str(_dtype);
+
+                py::str err_msg =
+                    py::str("'" + name + "' has unsupported type '") + dtype +
+                    py::str("'."
+                            " Supported types are: ") +
+                    types;
+
+                throw py::value_error(static_cast<std::string>(err_msg));
+            }
+        }
+
+        return fn;
+    }
+
+    const SupportedDTypeList &get_all_supported_types() const
+    {
+        return supported_types;
+    }
+
+private:
+    void populate_supported_types()
+    {
+        for (int i = 0; i < dpctl_td_ns::num_types; ++i) {
+            if (table[i] != nullptr) {
+                supported_types.emplace_back(dtype_from_typenum(i));
+            }
+        }
+    }
+
+    std::string name;
+    SupportedDTypeList supported_types;
+    Table<FnT> table;
+};
+
 template <typename FnT>
 class DispatchTable2
 {
diff --git a/dpnp/backend/extensions/statistics/histogram_common.cpp b/dpnp/backend/extensions/statistics/histogram_common.cpp
index e2445b78bb3f..af47fe633674 100644
--- a/dpnp/backend/extensions/statistics/histogram_common.cpp
+++ b/dpnp/backend/extensions/statistics/histogram_common.cpp
@@ -38,6 +38,8 @@
 
 #include "histogram_common.hpp"
 
+#include "validation_utils.hpp"
+
 namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
 using dpctl::tensor::usm_ndarray;
 using dpctl_td_ns::typenum_t;
@@ -46,6 +48,15 @@ namespace statistics
 {
 using common::CeilDiv;
 
+using validation::array_names;
+using validation::array_ptr;
+
+using validation::check_max_dims;
+using validation::check_num_dims;
+using validation::check_size_at_least;
+using validation::common_checks;
+using validation::name_of;
+
 namespace histogram
 {
 
@@ -55,11 +66,9 @@ void validate(const usm_ndarray &sample,
               const usm_ndarray &histogram)
 {
     auto exec_q = sample.get_queue();
-    using array_ptr = const usm_ndarray *;
 
     std::vector<array_ptr> arrays{&sample, &histogram};
-    std::unordered_map<array_ptr, std::string> names = {
-        {arrays[0], "sample"}, {arrays[1], "histogram"}};
+    array_names names = {{arrays[0], "sample"}, {arrays[1], "histogram"}};
 
     array_ptr bins_ptr = nullptr;
 
@@ -77,117 +86,48 @@ void validate(const usm_ndarray &sample,
         names.insert({weights_ptr, "weights"});
     }
 
-    auto get_name = [&](const array_ptr &arr) {
-        auto name_it = names.find(arr);
-        assert(name_it != names.end());
-
-        return "'" + name_it->second + "'";
-    };
-
-    dpctl::tensor::validation::CheckWritable::throw_if_not_writable(histogram);
-
-    auto unequal_queue =
-        std::find_if(arrays.cbegin(), arrays.cend(), [&](const array_ptr &arr) {
-            return arr->get_queue() != exec_q;
-        });
-
-    if (unequal_queue != arrays.cend()) {
-        throw py::value_error(
-            get_name(*unequal_queue) +
-            " parameter has incompatible queue with parameter " +
-            get_name(&sample));
-    }
-
-    auto non_contig_array =
-        std::find_if(arrays.cbegin(), arrays.cend(), [&](const array_ptr &arr) {
-            return !arr->is_c_contiguous();
-        });
+    common_checks({&sample, bins.has_value() ? &bins.value() : nullptr,
+                   weights.has_value() ? &weights.value() : nullptr},
+                  {&histogram}, names);
 
-    if (non_contig_array != arrays.cend()) {
-        throw py::value_error(get_name(*non_contig_array) +
-                              " parameter is not c-contiguos");
-    }
+    check_size_at_least(bins_ptr, 2, names);
 
-    auto check_overlaping = [&](const array_ptr &first,
-                                const array_ptr &second) {
-        if (first == nullptr || second == nullptr) {
-            return;
-        }
-
-        const auto &overlap = dpctl::tensor::overlap::MemoryOverlap();
-
-        if (overlap(*first, *second)) {
-            throw py::value_error(get_name(first) +
-                                  " has overlapping memory segments with " +
-                                  get_name(second));
-        }
-    };
-
-    check_overlaping(&sample, &histogram);
-    check_overlaping(bins_ptr, &histogram);
-    check_overlaping(weights_ptr, &histogram);
-
-    if (bins_ptr && bins_ptr->get_size() < 2) {
-        throw py::value_error(get_name(bins_ptr) +
-                              " parameter must have at least 2 elements");
-    }
-
-    if (histogram.get_size() < 1) {
-        throw py::value_error(get_name(&histogram) +
-                              " parameter must have at least 1 element");
-    }
-
-    if (histogram.get_ndim() != 1) {
-        throw py::value_error(get_name(&histogram) +
-                              " parameter must be 1d. Actual " +
-                              std::to_string(histogram.get_ndim()) + "d");
-    }
+    check_size_at_least(&histogram, 1, names);
+    check_num_dims(&histogram, 1, names);
 
     if (weights_ptr) {
-        if (weights_ptr->get_ndim() != 1) {
-            throw py::value_error(
-                get_name(weights_ptr) + " parameter must be 1d. Actual " +
-                std::to_string(weights_ptr->get_ndim()) + "d");
-        }
+        check_num_dims(weights_ptr, 1, names);
 
         auto sample_size = sample.get_size();
         auto weights_size = weights_ptr->get_size();
         if (sample.get_size() != weights_ptr->get_size()) {
-            throw py::value_error(
-                get_name(&sample) + " size (" + std::to_string(sample_size) +
-                ") and " + get_name(weights_ptr) + " size (" +
-                std::to_string(weights_size) + ")" + " must match");
+            throw py::value_error(name_of(&sample, names) + " size (" +
+                                  std::to_string(sample_size) + ") and " +
+                                  name_of(weights_ptr, names) + " size (" +
+                                  std::to_string(weights_size) + ")" +
+                                  " must match");
         }
     }
 
-    if (sample.get_ndim() > 2) {
-        throw py::value_error(
-            get_name(&sample) +
-            " parameter must have no more than 2 dimensions. Actual " +
-            std::to_string(sample.get_ndim()) + "d");
-    }
+    check_max_dims(&sample, 2, names);
 
     if (sample.get_ndim() == 1) {
-        if (bins_ptr != nullptr && bins_ptr->get_ndim() != 1) {
-            throw py::value_error(get_name(&sample) + " parameter is 1d, but " +
-                                  get_name(bins_ptr) + " is " +
-                                  std::to_string(bins_ptr->get_ndim()) + "d");
-        }
+        check_num_dims(bins_ptr, 1, names);
     }
     else if (sample.get_ndim() == 2) {
         auto sample_count = sample.get_shape(0);
         auto expected_dims = sample.get_shape(1);
 
         if (bins_ptr != nullptr && bins_ptr->get_ndim() != expected_dims) {
-            throw py::value_error(get_name(&sample) + " parameter has shape {" +
-                                  std::to_string(sample_count) + "x" +
-                                  std::to_string(expected_dims) + "}" +
-                                  ", so " + get_name(bins_ptr) +
-                                  " parameter expected to be " +
-                                  std::to_string(expected_dims) +
-                                  "d. "
-                                  "Actual " +
-                                  std::to_string(bins->get_ndim()) + "d");
+            throw py::value_error(
+                name_of(&sample, names) + " parameter has shape {" +
+                std::to_string(sample_count) + "x" +
+                std::to_string(expected_dims) + "}" + ", so " +
+                name_of(bins_ptr, names) + " parameter expected to be " +
+                std::to_string(expected_dims) +
+                "d. "
+                "Actual " +
+                std::to_string(bins->get_ndim()) + "d");
         }
     }
 
@@ -199,9 +139,9 @@ void validate(const usm_ndarray &sample,
 
         if (histogram.get_size() != expected_hist_size) {
             throw py::value_error(
-                get_name(&histogram) + " and " + get_name(bins_ptr) +
-                " shape mismatch. " + get_name(&histogram) +
-                " expected to have size = " +
+                name_of(&histogram, names) + " and " +
+                name_of(bins_ptr, names) + " shape mismatch. " +
+                name_of(&histogram, names) + " expected to have size = " +
                 std::to_string(expected_hist_size) + ". Actual " +
                 std::to_string(histogram.get_size()));
         }
@@ -209,7 +149,7 @@ void validate(const usm_ndarray &sample,
 
     int64_t max_hist_size = std::numeric_limits<uint32_t>::max() - 1;
     if (histogram.get_size() > max_hist_size) {
-        throw py::value_error(get_name(&histogram) +
+        throw py::value_error(name_of(&histogram, names) +
                               " parameter size expected to be less than " +
                               std::to_string(max_hist_size) + ". Actual " +
                               std::to_string(histogram.get_size()));
@@ -225,7 +165,7 @@ void validate(const usm_ndarray &sample,
         if (!_64bit_atomics) {
             auto device_name = device.get_info<sycl::info::device::name>();
             throw py::value_error(
-                get_name(&histogram) +
+                name_of(&histogram, names) +
                 " parameter has 64-bit type, but 64-bit atomics " +
                 " are not supported for " + device_name);
         }
diff --git a/dpnp/backend/extensions/statistics/sliding_dot_product1d.cpp b/dpnp/backend/extensions/statistics/sliding_dot_product1d.cpp
new file mode 100644
index 000000000000..ddc501bc13be
--- /dev/null
+++ b/dpnp/backend/extensions/statistics/sliding_dot_product1d.cpp
@@ -0,0 +1,170 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include <algorithm>
+#include <complex>
+#include <memory>
+#include <string>
+#include <type_traits>
+#include <unordered_map>
+#include <vector>
+
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+// dpctl tensor headers
+#include "dpctl4pybind11.hpp"
+#include "utils/type_dispatch.hpp"
+
+#include "common.hpp"
+#include "sliding_dot_product1d.hpp"
+#include "sliding_window1d.hpp"
+
+#include <iostream>
+
+namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
+using dpctl::tensor::usm_ndarray;
+
+using namespace statistics::sliding_window1d;
+using namespace statistics::common;
+
+namespace
+{
+
+template <typename T>
+struct SlidingDotProductF
+{
+    static sycl::event impl(sycl::queue &exec_q,
+                            const void *v_ain,
+                            const void *v_vin,
+                            void *v_out,
+                            const size_t a_size,
+                            const size_t v_size,
+                            const size_t l_pad,
+                            const size_t r_pad,
+                            const std::vector<sycl::event> &depends)
+    {
+        const T *ain = static_cast<const T *>(v_ain);
+        const T *vin = static_cast<const T *>(v_vin);
+        T *out = static_cast<T *>(v_out);
+
+        auto device = exec_q.get_device();
+        const auto local_size = get_max_local_size(device);
+        const auto work_size = l_pad + r_pad + a_size - v_size + 1;
+
+        constexpr int32_t WorkPI = 4;
+
+        return exec_q.submit([&](sycl::handler &cgh) {
+            cgh.depends_on(depends);
+
+            auto input = make_padded_span(ain, a_size, l_pad);
+            auto kernel = make_span(vin, v_size);
+            auto result = make_span(out, work_size);
+
+            if (v_size > 8) {
+                const auto nd_range =
+                    make_ndrange(work_size, local_size, WorkPI);
+                submit_sliding_window1d<WorkPI>(
+                    input, kernel, std::multiplies<>{}, std::plus<>{}, result,
+                    nd_range, cgh);
+            }
+            else {
+                const auto nd_range =
+                    make_ndrange(work_size, local_size, WorkPI);
+                submit_sliding_window1d_small_kernel<WorkPI>(
+                    input, kernel, std::multiplies<>{}, std::plus<>{}, result,
+                    nd_range, cgh);
+            }
+        });
+    }
+};
+
+using SupportedTypes = std::tuple<uint64_t,
+                                  int64_t,
+                                  float,
+                                  double,
+                                  std::complex<float>,
+                                  std::complex<double>>;
+} // namespace
+
+SlidingDotProduct1d::SlidingDotProduct1d() : dispatch_table("a")
+{
+    dispatch_table
+        .populate_dispatch_table<SupportedTypes, SlidingDotProductF>();
+}
+
+std::tuple<sycl::event, sycl::event>
+    SlidingDotProduct1d::call(const dpctl::tensor::usm_ndarray &a,
+                              const dpctl::tensor::usm_ndarray &v,
+                              dpctl::tensor::usm_ndarray &out,
+                              const size_t l_pad,
+                              const size_t r_pad,
+                              const std::vector<sycl::event> &depends)
+{
+    validate(a, v, out, l_pad, r_pad);
+
+    const int a_typenum = a.get_typenum();
+
+    auto corr_func = dispatch_table.get(a_typenum);
+
+    auto exec_q = a.get_queue();
+
+    auto ev = corr_func(exec_q, a.get_data(), v.get_data(), out.get_data(),
+                        a.get_shape(0), v.get_shape(0), l_pad, r_pad, depends);
+
+    sycl::event args_ev;
+    args_ev = dpctl::utils::keep_args_alive(exec_q, {a, v, out}, {ev});
+
+    return {args_ev, ev};
+}
+
+std::unique_ptr<SlidingDotProduct1d> sdp;
+
+void statistics::sliding_window1d::populate_sliding_dot_product1d(py::module_ m)
+{
+    using namespace std::placeholders;
+
+    sdp.reset(new SlidingDotProduct1d());
+
+    auto sdp_func = [sdpp =
+                         sdp.get()](const dpctl::tensor::usm_ndarray &a,
+                                    const dpctl::tensor::usm_ndarray &v,
+                                    dpctl::tensor::usm_ndarray &out,
+                                    const size_t l_pad, const size_t r_pad,
+                                    const std::vector<sycl::event> &depends) {
+        return sdpp->call(a, v, out, l_pad, r_pad, depends);
+    };
+
+    m.def("sliding_dot_product1d", sdp_func, "1d sliding dot product.",
+          py::arg("a"), py::arg("v"), py::arg("out"), py::arg("l_pad"),
+          py::arg("r_pad"), py::arg("depends") = py::list());
+
+    auto sdp_dtypes = [sdpp = sdp.get()]() {
+        return sdpp->dispatch_table.get_all_supported_types();
+    };
+
+    m.def("sliding_dot_product1d_dtypes", sdp_dtypes,
+          "Get the supported data types for sliding_dot_product1d_dtypes.");
+}
diff --git a/dpnp/backend/extensions/statistics/sliding_dot_product1d.hpp b/dpnp/backend/extensions/statistics/sliding_dot_product1d.hpp
new file mode 100644
index 000000000000..4081dc94a3ee
--- /dev/null
+++ b/dpnp/backend/extensions/statistics/sliding_dot_product1d.hpp
@@ -0,0 +1,66 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include "dispatch_table.hpp"
+#include "utils/type_dispatch.hpp"
+#include <pybind11/pybind11.h>
+#include <sycl/sycl.hpp>
+
+namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
+
+namespace statistics
+{
+namespace sliding_window1d
+{
+struct SlidingDotProduct1d
+{
+    using FnT = sycl::event (*)(sycl::queue &,
+                                const void *,
+                                const void *,
+                                void *,
+                                const size_t,
+                                const size_t,
+                                const size_t,
+                                const size_t,
+                                const std::vector<sycl::event> &);
+
+    common::DispatchTable<FnT> dispatch_table;
+
+    SlidingDotProduct1d();
+
+    std::tuple<sycl::event, sycl::event>
+        call(const dpctl::tensor::usm_ndarray &a,
+             const dpctl::tensor::usm_ndarray &v,
+             dpctl::tensor::usm_ndarray &output,
+             const size_t l_pad,
+             const size_t r_pad,
+             const std::vector<sycl::event> &depends);
+};
+
+void populate_sliding_dot_product1d(py::module_ m);
+} // namespace sliding_window1d
+} // namespace statistics
diff --git a/dpnp/backend/extensions/statistics/sliding_window1d.cpp b/dpnp/backend/extensions/statistics/sliding_window1d.cpp
new file mode 100644
index 000000000000..bf7ec2424c8e
--- /dev/null
+++ b/dpnp/backend/extensions/statistics/sliding_window1d.cpp
@@ -0,0 +1,96 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include <algorithm>
+#include <limits>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "dpctl4pybind11.hpp"
+#include "utils/type_dispatch.hpp"
+#include <pybind11/pybind11.h>
+
+#include "sliding_window1d.hpp"
+#include "validation_utils.hpp"
+
+namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
+using dpctl::tensor::usm_ndarray;
+using dpctl_td_ns::typenum_t;
+
+namespace statistics
+{
+using validation::array_names;
+using validation::array_ptr;
+using validation::check_num_dims;
+using validation::common_checks;
+using validation::name_of;
+
+namespace sliding_window1d
+{
+
+void validate(const usm_ndarray &a,
+              const usm_ndarray &v,
+              const usm_ndarray &out,
+              const size_t l_pad,
+              const size_t r_pad)
+{
+    std::vector<array_ptr> inputs = {&a, &v};
+    std::vector<array_ptr> outputs = {&out};
+
+    array_names names;
+    names[&a] = "a";
+    names[&v] = "v";
+    names[&out] = "out";
+
+    common_checks(inputs, outputs, names);
+
+    check_num_dims(&a, 1, names);
+    check_num_dims(&v, 1, names);
+    check_num_dims(&out, 1, names);
+
+    py::ssize_t padded_a_size = l_pad + r_pad + a.get_size();
+
+    if (v.get_size() > padded_a_size) {
+        throw py::value_error(name_of(&v, names) + " size (" +
+                              std::to_string(v.get_size()) +
+                              ") must be less than or equal to a size (" +
+                              std::to_string(a.get_size()) + ") + l_pad (" +
+                              std::to_string(l_pad) + ") + r_pad (" +
+                              std::to_string(r_pad) + ")");
+    }
+
+    auto expected_output_size = padded_a_size - v.get_size() + 1;
+    if (out.get_size() != expected_output_size) {
+        throw py::value_error(
+            name_of(&out, names) + " size (" + std::to_string(out.get_size()) +
+            ") must be equal to " + name_of(&a, names) +
+            " size + l_pad + r_pad " + name_of(&v, names) + " size + 1 (" +
+            std::to_string(expected_output_size) + ")");
+    }
+}
+
+} // namespace sliding_window1d
+} // namespace statistics
diff --git a/dpnp/backend/extensions/statistics/sliding_window1d.hpp b/dpnp/backend/extensions/statistics/sliding_window1d.hpp
new file mode 100644
index 000000000000..c60c9a52adb9
--- /dev/null
+++ b/dpnp/backend/extensions/statistics/sliding_window1d.hpp
@@ -0,0 +1,660 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include "utils/math_utils.hpp"
+#include <complex>
+#include <sycl/sycl.hpp>
+#include <tuple>
+#include <type_traits>
+
+#include <stdio.h>
+
+#include "common.hpp"
+
+namespace dpctl
+{
+namespace tensor
+{
+class usm_ndarray;
+}
+} // namespace dpctl
+
+using dpctl::tensor::usm_ndarray;
+
+namespace statistics
+{
+using common::Align;
+using common::CeilDiv;
+
+namespace sliding_window1d
+{
+
+template <typename T, uint32_t Size>
+class _RegistryDataStorage
+{
+public:
+    using ncT = typename std::remove_const_t<T>;
+    using SizeT = decltype(Size);
+    static constexpr SizeT _size = Size;
+
+    _RegistryDataStorage(const sycl::nd_item<1> &item)
+        : sbgroup(item.get_sub_group())
+    {
+    }
+
+    template <typename yT>
+    T &operator[](const yT &idx)
+    {
+        static_assert(std::is_integral_v<yT>,
+                      "idx must be of an integral type");
+        return data[idx];
+    }
+
+    template <typename yT>
+    const T &operator[](const yT &idx) const
+    {
+        static_assert(std::is_integral_v<yT>,
+                      "idx must be of an integral type");
+        return data[idx];
+    }
+
+    T &value()
+    {
+        static_assert(Size == 1,
+                      "Size is not equal to 1. Use value(idx) instead");
+        return data[0];
+    }
+
+    const T &value() const
+    {
+        static_assert(Size == 1,
+                      "Size is not equal to 1. Use value(idx) instead");
+        return data[0];
+    }
+
+    template <typename yT, typename xT>
+    T broadcast(const yT &y, const xT &x) const
+    {
+        static_assert(std::is_integral_v<std::remove_reference_t<yT>>,
+                      "y must be of an integral type");
+        static_assert(std::is_integral_v<std::remove_reference_t<xT>>,
+                      "x must be of an integral type");
+
+        return sycl::select_from_group(sbgroup, data[y], x);
+    }
+
+    template <typename iT>
+    T broadcast(const iT &idx) const
+    {
+        if constexpr (Size == 1) {
+            return broadcast(0, idx);
+        }
+        else {
+            return broadcast(idx / size_x(), idx % size_x());
+        }
+    }
+
+    template <typename yT, typename xT>
+    T shift_left(const yT &y, const xT &x) const
+    {
+        static_assert(std::is_integral_v<yT>, "y must be of an integral type");
+        static_assert(std::is_integral_v<xT>, "x must be of an integral type");
+
+        return sycl::shift_group_left(sbgroup, data[y], x);
+    }
+
+    template <typename yT, typename xT>
+    T shift_right(const yT &y, const xT &x) const
+    {
+        static_assert(std::is_integral_v<yT>, "y must be of an integral type");
+        static_assert(std::is_integral_v<xT>, "x must be of an integral type");
+
+        return sycl::shift_group_right(sbgroup, data[y], x);
+    }
+
+    constexpr SizeT size_y() const
+    {
+        return _size;
+    }
+
+    SizeT size_x() const
+    {
+        return sbgroup.get_max_local_range()[0];
+    }
+
+    SizeT total_size() const
+    {
+        return size_x() * size_y();
+    }
+
+    ncT *ptr()
+    {
+        return data;
+    }
+
+    SizeT x() const
+    {
+        return sbgroup.get_local_linear_id();
+    }
+
+protected:
+    const sycl::sub_group sbgroup;
+    ncT data[Size];
+};
+
+template <typename T, uint32_t Size = 1>
+struct RegistryData : public _RegistryDataStorage<T, Size>
+{
+    using SizeT = typename _RegistryDataStorage<T, Size>::SizeT;
+
+    using _RegistryDataStorage<T, Size>::_RegistryDataStorage;
+
+    template <typename LaneIdT,
+              typename Condition,
+              typename = std::enable_if_t<
+                  std::is_invocable_r_v<bool, Condition, SizeT>>>
+    void fill_lane(const LaneIdT &lane_id, const T &value, Condition &&mask)
+    {
+        static_assert(std::is_integral_v<LaneIdT>,
+                      "lane_id must be of an integral type");
+        if (mask(this->x())) {
+            this->data[lane_id] = value;
+        }
+    }
+
+    template <typename LaneIdT>
+    void fill_lane(const LaneIdT &lane_id, const T &value, const bool &mask)
+    {
+        fill_lane(lane_id, value, [mask](auto &&) { return mask; });
+    }
+
+    template <typename LaneIdT>
+    void fill_lane(const LaneIdT &lane_id, const T &value)
+    {
+        fill_lane(lane_id, value, true);
+    }
+
+    template <typename Condition,
+              typename = std::enable_if_t<
+                  std::is_invocable_r_v<bool, Condition, SizeT, SizeT>>>
+    void fill(const T &value, Condition &&mask)
+    {
+        for (SizeT i = 0; i < Size; ++i) {
+            fill_lane(i, value, mask(i, this->x()));
+        }
+    }
+
+    void fill(const T &value)
+    {
+        fill(value, [](auto &&, auto &&) { return true; });
+    }
+
+    template <typename LaneIdT,
+              typename Condition,
+              typename = std::enable_if_t<
+                  std::is_invocable_r_v<bool, Condition, const T *const>>>
+    T *load_lane(const LaneIdT &lane_id,
+                 const T *const data,
+                 Condition &&mask,
+                 const T &default_v)
+    {
+        static_assert(std::is_integral_v<LaneIdT>,
+                      "lane_id must be of an integral type");
+        this->data[lane_id] = mask(data) ? data[0] : default_v;
+
+        return data + this->size_x();
+    }
+
+    template <typename LaneIdT>
+    T *load_lane(const LaneIdT &laned_id,
+                 const T *const data,
+                 const bool &mask,
+                 const T &default_v)
+    {
+        return load_lane(
+            laned_id, data, [mask](auto &&) { return mask; }, default_v);
+    }
+
+    template <typename LaneIdT>
+    T *load_lane(const LaneIdT &laned_id, const T *const data)
+    {
+        constexpr T default_v = 0;
+        return load_lane(laned_id, data, true, default_v);
+    }
+
+    template <typename yStrideT,
+              typename Condition,
+              typename = std::enable_if_t<
+                  std::is_invocable_r_v<bool, Condition, const T *const>>>
+    T *load(const T *const data,
+            const yStrideT &y_stride,
+            Condition &&mask,
+            const T &default_v)
+    {
+        auto *it = data;
+        for (SizeT i = 0; i < Size; ++i) {
+            load_lane(i, it, mask, default_v);
+            it += y_stride;
+        }
+
+        return it;
+    }
+
+    template <typename yStrideT>
+    T *load(const T *const data,
+            const yStrideT &y_stride,
+            const bool &mask,
+            const T &default_v)
+    {
+        return load(
+            data, y_stride, [mask](auto &&) { return mask; }, default_v);
+    }
+
+    template <typename Condition,
+              typename = std::enable_if_t<
+                  std::is_invocable_r_v<bool, Condition, const T *const>>>
+    T *load(const T *const data, Condition &&mask, const T &default_v)
+    {
+        return load(data, this->size_x(), mask, default_v);
+    }
+
+    T *load(const T *const data, const bool &mask, const T &default_v)
+    {
+        return load(
+            data, [mask](auto &&) { return mask; }, default_v);
+    }
+
+    T *load(const T *const data)
+    {
+        constexpr T default_v = 0;
+        return load(data, true, default_v);
+    }
+
+    template <typename LaneIdT,
+              typename Condition,
+              typename = std::enable_if_t<
+                  std::is_invocable_r_v<bool, Condition, const T *const>>>
+    T *store_lane(const LaneIdT &lane_id, T *const data, Condition &&mask)
+    {
+        static_assert(std::is_integral_v<LaneIdT>,
+                      "lane_id must be of an integral type");
+
+        if (mask(data)) {
+            data[0] = this->data[lane_id];
+        }
+
+        return data + this->size_x();
+    }
+
+    template <typename LaneIdT>
+    T *store_lane(const LaneIdT &lane_id, T *const data, const bool &mask)
+    {
+        return store_lane(lane_id, data, [mask](auto &&) { return mask; });
+    }
+
+    template <typename LaneIdT>
+    T *store_lane(const LaneIdT &lane_id, T *const data)
+    {
+        return store_lane(lane_id, data, true);
+    }
+
+    template <typename yStrideT,
+              typename Condition,
+              typename = std::enable_if_t<
+                  std::is_invocable_r_v<bool, Condition, const T *const>>>
+    T *store(T *const data, const yStrideT &y_stride, Condition &&condition)
+    {
+        auto *it = data;
+        for (SizeT i = 0; i < Size; ++i) {
+            store_lane(i, it, condition);
+            it += y_stride;
+        }
+
+        return it;
+    }
+
+    template <typename yStrideT>
+    T *store(T *const data, const yStrideT &y_stride, const bool &mask)
+    {
+        return store(data, y_stride, [mask](auto &&) { return mask; });
+    }
+
+    template <typename Condition,
+              typename = std::enable_if_t<
+                  std::is_invocable_r_v<bool, Condition, const T *const>>>
+    T *store(T *const data, Condition &&condition)
+    {
+        return store(data, this->size_x(), condition);
+    }
+
+    T *store(T *const data, const bool &mask)
+    {
+        return store(data, [mask](auto &&) { return mask; });
+    }
+
+    T *store(T *const data)
+    {
+        return store(data, true);
+    }
+};
+
+template <typename T, uint32_t Size>
+struct RegistryWindow : public RegistryData<T, Size>
+{
+    using SizeT = typename RegistryData<T, Size>::SizeT;
+
+    using RegistryData<T, Size>::RegistryData;
+
+    template <typename shT>
+    void advance_left(const shT &shift, const T &fill_value)
+    {
+        static_assert(std::is_integral_v<shT>,
+                      "shift must be of an integral type");
+
+        uint32_t shift_r = this->size_x() - shift;
+        for (SizeT i = 0; i < Size; ++i) {
+            this->data[i] = this->shift_left(i, shift);
+            auto border =
+                i < Size - 1 ? this->shift_right(i + 1, shift_r) : fill_value;
+            if (this->x() >= shift_r) {
+                this->data[i] = border;
+            }
+        }
+    }
+
+    void advance_left(const T &fill_value)
+    {
+        advance_left(1, fill_value);
+    }
+
+    void advance_left()
+    {
+        constexpr T fill_value = 0;
+        advance_left(fill_value);
+    }
+};
+
+template <typename T, typename SizeT = size_t>
+class Span
+{
+public:
+    using value_type = T;
+    using size_type = SizeT;
+
+    Span(T *const data, const SizeT size) : data_(data), size_(size) {}
+
+    T *begin() const
+    {
+        return data();
+    }
+
+    T *end() const
+    {
+        return data() + size();
+    }
+
+    SizeT size() const
+    {
+        return size_;
+    }
+
+    T *data() const
+    {
+        return data_;
+    }
+
+protected:
+    T *const data_;
+    const SizeT size_;
+};
+
+template <typename T, typename SizeT = size_t>
+Span<T, SizeT> make_span(T *const data, const SizeT size)
+{
+    return Span<T, SizeT>(data, size);
+}
+
+template <typename T, typename SizeT = size_t>
+class PaddedSpan : public Span<T, SizeT>
+{
+public:
+    using value_type = T;
+    using size_type = SizeT;
+
+    PaddedSpan(T *const data, const SizeT size, const SizeT pad)
+        : Span<T>(data, size), pad_(pad)
+    {
+    }
+
+    T *padded_begin() const
+    {
+        return this->begin() - pad();
+    }
+
+    SizeT pad() const
+    {
+        return pad_;
+    }
+
+protected:
+    const SizeT pad_;
+};
+
+template <typename T, typename SizeT = size_t>
+PaddedSpan<T, SizeT>
+    make_padded_span(T *const data, const SizeT size, const SizeT offset)
+{
+    return PaddedSpan<T, SizeT>(data, size, offset);
+}
+
+template <typename Results,
+          typename AData,
+          typename VData,
+          typename Op,
+          typename Red>
+void process_block(Results &results,
+                   uint32_t r_size,
+                   AData &a_data,
+                   VData &v_data,
+                   uint32_t block_size,
+                   Op op,
+                   Red red)
+{
+    for (uint32_t i = 0; i < block_size; ++i) {
+        auto v_val = v_data.broadcast(i);
+        for (uint32_t r = 0; r < r_size; ++r) {
+            results[r] = red(results[r], op(a_data[r], v_val));
+        }
+        a_data.advance_left();
+    }
+}
+
+template <typename SizeT>
+SizeT get_global_linear_id(const uint32_t wpi, const sycl::nd_item<1> &item)
+{
+    auto sbgroup = item.get_sub_group();
+    const auto sg_loc_id = sbgroup.get_local_linear_id();
+
+    const SizeT sg_base_id = wpi * (item.get_global_linear_id() - sg_loc_id);
+    const SizeT id = sg_base_id + sg_loc_id;
+
+    return id;
+}
+
+template <typename SizeT>
+uint32_t get_results_num(const uint32_t wpi,
+                         const SizeT size,
+                         const SizeT global_id,
+                         const sycl::nd_item<1> &item)
+{
+    auto sbgroup = item.get_sub_group();
+
+    const auto sbg_size = sbgroup.get_max_local_range()[0];
+    const auto size_ = sycl::sub_sat(size, global_id);
+    return std::min(SizeT(wpi), CeilDiv(size_, sbg_size));
+}
+
+template <uint32_t WorkPI,
+          typename T,
+          typename SizeT,
+          typename Op,
+          typename Red>
+class sliding_window1d_kernel;
+
+template <uint32_t WorkPI,
+          typename T,
+          typename SizeT,
+          typename Op,
+          typename Red>
+void submit_sliding_window1d(const PaddedSpan<const T, SizeT> &a,
+                             const Span<const T, SizeT> &v,
+                             const Op &op,
+                             const Red &red,
+                             Span<T, SizeT> &out,
+                             sycl::nd_range<1> nd_range,
+                             sycl::handler &cgh)
+{
+    cgh.parallel_for<sliding_window1d_kernel<WorkPI, T, SizeT, Op, Red>>(
+        nd_range, [=](sycl::nd_item<1> item) {
+            auto glid = get_global_linear_id<SizeT>(WorkPI, item);
+
+            auto results = RegistryData<T, WorkPI>(item);
+            results.fill(0);
+
+            auto results_num = get_results_num(WorkPI, out.size(), glid, item);
+
+            const auto *a_begin = a.begin();
+            const auto *a_end = a.end();
+
+            auto sbgroup = item.get_sub_group();
+
+            const auto chunks_count =
+                CeilDiv(v.size(), sbgroup.get_max_local_range()[0]);
+
+            const auto *a_ptr = &a.padded_begin()[glid];
+
+            auto _a_load_cond = [a_begin, a_end](auto &&ptr) {
+                return ptr >= a_begin && ptr < a_end;
+            };
+
+            auto a_data = RegistryWindow<const T, WorkPI + 1>(item);
+            a_ptr = a_data.load(a_ptr, _a_load_cond, 0);
+
+            const auto *v_ptr = &v.begin()[sbgroup.get_local_linear_id()];
+            auto v_size = v.size();
+
+            for (uint32_t b = 0; b < chunks_count; ++b) {
+                auto v_data = RegistryData<const T>(item);
+                v_ptr = v_data.load(v_ptr, v_data.x() < v_size, 0);
+
+                uint32_t chunk_size_ =
+                    std::min(v_size, SizeT(v_data.total_size()));
+                process_block(results, results_num, a_data, v_data, chunk_size_,
+                              op, red);
+
+                if (b != chunks_count - 1) {
+                    a_ptr = a_data.load_lane(a_data.size_y() - 1, a_ptr,
+                                             _a_load_cond, 0);
+                    v_size -= v_data.total_size();
+                }
+            }
+
+            auto *const out_ptr = out.begin();
+            auto *const out_end = out.end();
+            results.store(&out_ptr[glid],
+                          [out_end](auto &&ptr) { return ptr < out_end; });
+        });
+}
+
+template <uint32_t WorkPI,
+          typename T,
+          typename SizeT,
+          typename Op,
+          typename Red>
+class sliding_window1d_small_kernel;
+
+template <uint32_t WorkPI,
+          typename T,
+          typename SizeT,
+          typename Op,
+          typename Red>
+void submit_sliding_window1d_small_kernel(const PaddedSpan<const T, SizeT> &a,
+                                          const Span<const T, SizeT> &v,
+                                          const Op &op,
+                                          const Red &red,
+                                          Span<T, SizeT> &out,
+                                          sycl::nd_range<1> nd_range,
+                                          sycl::handler &cgh)
+{
+    cgh.parallel_for<sliding_window1d_small_kernel<WorkPI, T, SizeT, Op, Red>>(
+        nd_range, [=](sycl::nd_item<1> item) {
+            auto glid = get_global_linear_id<SizeT>(WorkPI, item);
+
+            auto results = RegistryData<T, WorkPI>(item);
+            results.fill(0);
+
+            auto sbgroup = item.get_sub_group();
+            auto sg_size = sbgroup.get_max_local_range()[0];
+
+            const uint32_t to_read = WorkPI * sg_size + v.size();
+            const auto *a_begin = a.begin();
+
+            const auto *a_ptr = &a.padded_begin()[glid];
+            const auto *a_end = std::min(a_ptr + to_read, a.end());
+
+            auto _a_load_cond = [a_begin, a_end](auto &&ptr) {
+                return ptr >= a_begin && ptr < a_end;
+            };
+
+            auto a_data = RegistryWindow<const T, WorkPI + 1>(item);
+            a_data.load(a_ptr, _a_load_cond, 0);
+
+            const auto *v_ptr = &v.begin()[sbgroup.get_local_linear_id()];
+            auto v_size = v.size();
+
+            auto v_data = RegistryData<const T>(item);
+            v_ptr = v_data.load(v_ptr, v_data.x() < v_size, 0);
+
+            auto results_num = get_results_num(WorkPI, out.size(), glid, item);
+
+            process_block(results, results_num, a_data, v_data, v_size, op,
+                          red);
+
+            auto *const out_ptr = out.begin();
+            auto *const out_end = out.end();
+            results.store(&out_ptr[glid],
+                          [out_end](auto &&ptr) { return ptr < out_end; });
+        });
+}
+
+void validate(const usm_ndarray &a,
+              const usm_ndarray &v,
+              const usm_ndarray &out,
+              const size_t l_pad,
+              const size_t r_pad);
+} // namespace sliding_window1d
+} // namespace statistics
diff --git a/dpnp/backend/extensions/statistics/statistics_py.cpp b/dpnp/backend/extensions/statistics/statistics_py.cpp
index 2f3bf6a901c1..cfb3f66e08e9 100644
--- a/dpnp/backend/extensions/statistics/statistics_py.cpp
+++ b/dpnp/backend/extensions/statistics/statistics_py.cpp
@@ -32,9 +32,11 @@
 
 #include "bincount.hpp"
 #include "histogram.hpp"
+#include "sliding_dot_product1d.hpp"
 
 PYBIND11_MODULE(_statistics_impl, m)
 {
     statistics::histogram::populate_bincount(m);
     statistics::histogram::populate_histogram(m);
+    statistics::sliding_window1d::populate_sliding_dot_product1d(m);
 }
diff --git a/dpnp/backend/extensions/statistics/validation_utils.cpp b/dpnp/backend/extensions/statistics/validation_utils.cpp
new file mode 100644
index 000000000000..195d5e4df253
--- /dev/null
+++ b/dpnp/backend/extensions/statistics/validation_utils.cpp
@@ -0,0 +1,190 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include "validation_utils.hpp"
+#include "utils/memory_overlap.hpp"
+
+using statistics::validation::array_names;
+using statistics::validation::array_ptr;
+
+namespace
+{
+
+sycl::queue get_queue(const std::vector<array_ptr> &inputs,
+                      const std::vector<array_ptr> &outputs)
+{
+    auto it = std::find_if(inputs.cbegin(), inputs.cend(),
+                           [](const array_ptr &arr) { return arr != nullptr; });
+
+    if (it != inputs.cend()) {
+        return (*it)->get_queue();
+    }
+
+    it = std::find_if(outputs.cbegin(), outputs.cend(),
+                      [](const array_ptr &arr) { return arr != nullptr; });
+
+    if (it != outputs.cend()) {
+        return (*it)->get_queue();
+    }
+
+    throw py::value_error("No input or output arrays found");
+}
+} // namespace
+
+namespace statistics
+{
+namespace validation
+{
+std::string name_of(const array_ptr &arr, const array_names &names)
+{
+    auto name_it = names.find(arr);
+    assert(name_it != names.end());
+
+    if (name_it != names.end())
+        return "'" + name_it->second + "'";
+
+    return "'unknown'";
+}
+
+void check_writable(const std::vector<array_ptr> &arrays,
+                    const array_names &names)
+{
+    for (const auto &arr : arrays) {
+        if (arr != nullptr && !arr->is_writable()) {
+            throw py::value_error(name_of(arr, names) +
+                                  " parameter is not writable");
+        }
+    }
+}
+
+void check_c_contig(const std::vector<array_ptr> &arrays,
+                    const array_names &names)
+{
+    for (const auto &arr : arrays) {
+        if (arr != nullptr && !arr->is_c_contiguous()) {
+            throw py::value_error(name_of(arr, names) +
+                                  " parameter is not c-contiguos");
+        }
+    }
+}
+
+void check_queue(const std::vector<array_ptr> &arrays,
+                 const array_names &names,
+                 const sycl::queue &exec_q)
+{
+    auto unequal_queue =
+        std::find_if(arrays.cbegin(), arrays.cend(), [&](const array_ptr &arr) {
+            return arr != nullptr && arr->get_queue() != exec_q;
+        });
+
+    if (unequal_queue != arrays.cend()) {
+        throw py::value_error(
+            name_of(*unequal_queue, names) +
+            " parameter has incompatible queue with other parameters");
+    }
+}
+
+void check_no_overlap(const array_ptr &input,
+                      const array_ptr &output,
+                      const array_names &names)
+{
+    if (input == nullptr || output == nullptr) {
+        return;
+    }
+
+    const auto &overlap = dpctl::tensor::overlap::MemoryOverlap();
+
+    if (overlap(*input, *output)) {
+        throw py::value_error(name_of(input, names) +
+                              " has overlapping memory segments with " +
+                              name_of(output, names));
+    }
+}
+
+void check_no_overlap(const std::vector<array_ptr> &inputs,
+                      const std::vector<array_ptr> &outputs,
+                      const array_names &names)
+{
+    for (const auto &input : inputs) {
+        for (const auto &output : outputs) {
+            check_no_overlap(input, output, names);
+        }
+    }
+}
+
+void check_num_dims(const array_ptr &arr,
+                    const size_t ndim,
+                    const array_names &names)
+{
+    if (arr != nullptr && arr->get_ndim() != ndim) {
+        throw py::value_error("Array " + name_of(arr, names) + " must be " +
+                              std::to_string(ndim) + "D, but got " +
+                              std::to_string(arr->get_ndim()) + "D.");
+    }
+}
+
+void check_max_dims(const array_ptr &arr,
+                    const size_t max_ndim,
+                    const array_names &names)
+{
+    if (arr != nullptr && arr->get_ndim() > max_ndim) {
+        throw py::value_error(
+            "Array " + name_of(arr, names) + " must have no more than " +
+            std::to_string(max_ndim) + " dimensions, but got " +
+            std::to_string(arr->get_ndim()) + " dimensions.");
+    }
+}
+
+void check_size_at_least(const array_ptr &arr,
+                         const size_t size,
+                         const array_names &names)
+{
+    if (arr != nullptr && arr->get_size() < size) {
+        throw py::value_error("Array " + name_of(arr, names) +
+                              " must have at least " + std::to_string(size) +
+                              " elements, but got " +
+                              std::to_string(arr->get_size()) + " elements.");
+    }
+}
+
+void common_checks(const std::vector<array_ptr> &inputs,
+                   const std::vector<array_ptr> &outputs,
+                   const array_names &names)
+{
+    check_writable(outputs, names);
+
+    check_c_contig(inputs, names);
+    check_c_contig(outputs, names);
+
+    auto exec_q = get_queue(inputs, outputs);
+
+    check_queue(inputs, names, exec_q);
+    check_queue(outputs, names, exec_q);
+
+    check_no_overlap(inputs, outputs, names);
+}
+
+} // namespace validation
+} // namespace statistics
diff --git a/dpnp/backend/extensions/statistics/validation_utils.hpp b/dpnp/backend/extensions/statistics/validation_utils.hpp
new file mode 100644
index 000000000000..845cbc9d9928
--- /dev/null
+++ b/dpnp/backend/extensions/statistics/validation_utils.hpp
@@ -0,0 +1,73 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "dpctl4pybind11.hpp"
+
+namespace statistics
+{
+namespace validation
+{
+using array_ptr = const dpctl::tensor::usm_ndarray *;
+using array_names = std::unordered_map<array_ptr, std::string>;
+
+std::string name_of(const array_ptr &arr, const array_names &names);
+
+void check_writable(const std::vector<array_ptr> &arrays,
+                    const array_names &names);
+void check_c_contig(const std::vector<array_ptr> &arrays,
+                    const array_names &names);
+void check_queue(const std::vector<array_ptr> &arrays,
+                 const array_names &names,
+                 const sycl::queue &exec_q);
+
+void check_no_overlap(const array_ptr &inputs,
+                      const array_ptr &outputs,
+                      const array_names &names);
+void check_no_overlap(const std::vector<array_ptr> &inputs,
+                      const std::vector<array_ptr> &outputs,
+                      const array_names &names);
+
+void check_num_dims(const array_ptr &arr,
+                    const size_t ndim,
+                    const array_names &names);
+void check_max_dims(const array_ptr &arr,
+                    const size_t max_ndim,
+                    const array_names &names);
+
+void check_size_at_least(const array_ptr &arr,
+                         const size_t size,
+                         const array_names &names);
+
+void common_checks(const std::vector<array_ptr> &inputs,
+                   const std::vector<array_ptr> &outputs,
+                   const array_names &names);
+} // namespace validation
+} // namespace statistics
diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py
index 68aea4eee8d7..0d44a24dde7d 100644
--- a/dpnp/dpnp_iface_statistics.py
+++ b/dpnp/dpnp_iface_statistics.py
@@ -38,13 +38,19 @@
 """
 
 import dpctl.tensor as dpt
+import dpctl.utils as dpu
 import numpy
 from dpctl.tensor._numpy_helper import normalize_axis_index
 
 import dpnp
 
 # pylint: disable=no-name-in-module
-from .dpnp_algo import dpnp_correlate
+import dpnp.backend.extensions.statistics._statistics_impl as statistics_ext
+from dpnp.dpnp_utils.dpnp_utils_common import (
+    result_type_for_device,
+    to_supported_dtypes,
+)
+
 from .dpnp_utils import call_origin, get_usm_allocations
 from .dpnp_utils.dpnp_utils_reduction import dpnp_wrap_reduction_call
 from .dpnp_utils.dpnp_utils_statistics import dpnp_cov, dpnp_median
@@ -433,47 +439,181 @@ def corrcoef(x, y=None, rowvar=True, *, dtype=None):
     return out
 
 
-def correlate(x1, x2, mode="valid"):
-    """
+def _get_padding(a_size, v_size, mode):
+    if v_size > a_size:
+        a_size, v_size = v_size, a_size
+
+    if mode == "valid":
+        l_pad, r_pad = 0, 0
+    elif mode == "same":
+        l_pad = v_size // 2
+        r_pad = v_size - l_pad - 1
+    elif mode == "full":
+        l_pad, r_pad = v_size - 1, v_size - 1
+    else:
+        raise ValueError(
+            f"Unknown mode: {mode}. Only 'valid', 'same', 'full' are supported."
+        )
+
+    return l_pad, r_pad
+
+
+def _run_native_sliding_dot_product1d(a, v, l_pad, r_pad):
+    queue = a.sycl_queue
+
+    usm_type = dpu.get_coerced_usm_type([a.usm_type, v.usm_type])
+    out_size = l_pad + r_pad + a.size - v.size + 1
+    out = dpnp.empty(
+        shape=out_size, sycl_queue=queue, dtype=a.dtype, usm_type=usm_type
+    )
+
+    a_usm = dpnp.get_usm_ndarray(a)
+    v_usm = dpnp.get_usm_ndarray(v)
+    out_usm = dpnp.get_usm_ndarray(out)
+
+    _manager = dpu.SequentialOrderManager[queue]
+
+    mem_ev, corr_ev = statistics_ext.sliding_dot_product1d(
+        a_usm,
+        v_usm,
+        out_usm,
+        l_pad,
+        r_pad,
+        depends=_manager.submitted_events,
+    )
+    _manager.add_event_pair(mem_ev, corr_ev)
+
+    return out
+
+
+def correlate(a, v, mode="valid"):
+    r"""
     Cross-correlation of two 1-dimensional sequences.
 
+    This function computes the correlation as generally defined in signal
+    processing texts [1]:
+
+    .. math:: c_k = \sum_n a_{n+k} \cdot \overline{v}_n
+
+    with a and v sequences being zero-padded where necessary and
+    :math:`\overline v` denoting complex conjugation.
+
     For full documentation refer to :obj:`numpy.correlate`.
 
-    Limitations
-    -----------
-    Input arrays are supported as :obj:`dpnp.ndarray`.
-    Size and shape of input arrays are supported to be equal.
-    Parameter `mode` is supported only with default value ``"valid"``.
-    Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+    Parameters
+    ----------
+    a : {dpnp.ndarray, usm_ndarray}
+        First input array.
+    v : {dpnp.ndarray, usm_ndarray}
+        Second input array.
+    mode : {'valid', 'same', 'full'}, optional
+        Refer to the :obj:`dpnp.convolve` docstring. Note that the default
+        is ``'valid'``, unlike :obj:`dpnp.convolve`, which uses ``'full'``.
+
+        Default: ``'valid'``.
+
+    Notes
+    -----
+    The definition of correlation above is not unique and sometimes
+    correlation may be defined differently. Another common definition is [1]:
+
+    .. math:: c'_k = \sum_n a_{n} \cdot \overline{v_{n+k}}
+
+    which is related to :math:`c_k` by :math:`c'_k = c_{-k}`.
+
+    References
+    ----------
+    .. [1] Wikipedia, "Cross-correlation",
+           https://en.wikipedia.org/wiki/Cross-correlation
+
+    Returns
+    -------
+    out : {dpnp.ndarray}
+        Discrete cross-correlation of `a` and `v`.
 
     See Also
     --------
-    :obj:`dpnp.convolve` : Discrete, linear convolution of
-                           two one-dimensional sequences.
+    :obj:`dpnp.convolve` : Discrete, linear convolution of two
+    one-dimensional sequences.
+
 
     Examples
     --------
     >>> import dpnp as np
-    >>> x = np.correlate([1, 2, 3], [0, 1, 0.5])
-    >>> [i for i in x]
-    [3.5]
+    >>> a = np.array([1, 2, 3], dtype=np.float32)
+    >>> v = np.array([0, 1, 0.5], dtype=np.float32)
+    >>> np.correlate(a, v)
+    array([3.5], dtype=float32)
+    >>> np.correlate(a, v, "same")
+    array([2. , 3.5, 3. ], dtype=float32)
+    >>> np.correlate([1, 2, 3], [0, 1, 0.5], "full")
+    array([0.5, 2. , 3.5, 3. , 0. ], dtype=float32)
+
+    Using complex sequences:
+
+    >>> ac = np.array([1+1j, 2, 3-1j], dtype=np.complex64)
+    >>> vc = np.array([0, 1, 0.5j], dtype=np.complex64)
+    >>> np.correlate(ac, vc, 'full')
+    array([0.5-0.5j, 1. +0.j , 1.5-1.5j, 3. -1.j , 0. +0.j ], dtype=complex64)
+
+    Note that you get the time reversed, complex conjugated result
+    (:math:`\overline{c_{-k}}`) when the two input sequences a and v change
+    places:
+
+    >>> np.correlate([0, 1, 0.5j], [1+1j, 2, 3-1j], 'full')
+    array([0. +0.j , 3. +1.j , 1.5+1.5j, 1. +0.j , 0.5+0.5j], dtype=complex64)
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False)
-    if x1_desc and x2_desc:
-        if x1_desc.size != x2_desc.size or x1_desc.size == 0:
-            pass
-        elif x1_desc.shape != x2_desc.shape:
-            pass
-        elif mode != "valid":
-            pass
-        else:
-            return dpnp_correlate(x1_desc, x2_desc).get_pyobj()
+    dpnp.check_supported_arrays_type(a, v)
+
+    if a.size == 0 or v.size == 0:
+        raise ValueError(
+            f"Array arguments cannot be empty. "
+            f"Received sizes: a.size={a.size}, v.size={v.size}"
+        )
+    if a.ndim != 1 or v.ndim != 1:
+        raise ValueError(
+            f"Only 1-dimensional arrays are supported. "
+            f"Received shapes: a.shape={a.shape}, v.shape={v.shape}"
+        )
+
+    supported_types = statistics_ext.sliding_dot_product1d_dtypes()
+
+    device = a.sycl_device
+    rdtype = result_type_for_device([a.dtype, v.dtype], device)
+    supported_dtype = to_supported_dtypes(rdtype, supported_types, device)
+
+    if supported_dtype is None:
+        raise ValueError(
+            f"function '{correlate}' does not support input types "
+            f"({a.dtype}, {v.dtype}), "
+            "and the inputs could not be coerced to any "
+            f"supported types. List of supported types: {supported_types}"
+        )
+
+    if dpnp.issubdtype(v.dtype, dpnp.complexfloating):
+        v = dpnp.conj(v)
+
+    revert = False
+    if v.size > a.size:
+        revert = True
+        a, v = v, a
+
+    l_pad, r_pad = _get_padding(a.size, v.size, mode)
+
+    a_casted = dpnp.asarray(a, dtype=supported_dtype, order="C")
+    v_casted = dpnp.asarray(v, dtype=supported_dtype, order="C")
+
+    if v.size > a.size:
+        a_casted, v_casted = v_casted, a_casted
+
+    r = _run_native_sliding_dot_product1d(a_casted, v_casted, l_pad, r_pad)
+
+    if revert:
+        r = r[::-1]
 
-    return call_origin(numpy.correlate, x1, x2, mode=mode)
+    return dpnp.asarray(r, dtype=rdtype, order="C")
 
 
 def cov(
diff --git a/dpnp/dpnp_utils/dpnp_utils_common.py b/dpnp/dpnp_utils/dpnp_utils_common.py
new file mode 100644
index 000000000000..54e97d03c114
--- /dev/null
+++ b/dpnp/dpnp_utils/dpnp_utils_common.py
@@ -0,0 +1,81 @@
+# *****************************************************************************
+# Copyright (c) 2023-2024, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+
+from collections.abc import Iterable
+
+from dpctl.tensor._type_utils import _can_cast
+
+import dpnp
+from dpnp.dpnp_utils import map_dtype_to_device
+
+__all__ = ["result_type_for_device", "to_supported_dtypes"]
+
+
+def result_type_for_device(dtypes, device):
+    """Works as dpnp.result_type, but taking into account the device capabilities."""
+
+    rt = dpnp.result_type(*dtypes)
+    return map_dtype_to_device(rt, device)
+
+
+def to_supported_dtypes(dtypes, supported_types, device):
+    """
+    Convert input dtypes to the supported ones based on the device capabilities.
+    Return types from the supported_types list that are compatible with the input dtypes.
+    If no compatible types are found, return None.
+    """
+
+    has_fp64 = device.has_aspect_fp64
+    has_fp16 = device.has_aspect_fp16
+
+    def is_castable(dtype, stype):
+        return _can_cast(dtype, stype, has_fp16, has_fp64)
+
+    if dtypes in supported_types:
+        return dtypes
+
+    for stypes in supported_types:
+        if not isinstance(dtypes, Iterable):
+            if isinstance(stypes, Iterable):
+                raise ValueError(
+                    "Input and supported types must have the same length"
+                )
+
+            if is_castable(dtypes, stypes):
+                return stypes
+        else:
+            if not isinstance(stypes, Iterable) or len(dtypes) != len(stypes):
+                raise ValueError(
+                    "Input and supported types must have the same length"
+                )
+
+            if all(
+                is_castable(dtype, stype)
+                for dtype, stype in zip(dtypes, stypes)
+            ):
+                return stypes
+
+    return None
diff --git a/dpnp/tests/test_statistics.py b/dpnp/tests/test_statistics.py
index 8dbccb379d9d..96e59d9bbede 100644
--- a/dpnp/tests/test_statistics.py
+++ b/dpnp/tests/test_statistics.py
@@ -628,6 +628,79 @@ def test_corrcoef_scalar(self):
         assert_dtype_allclose(result, expected)
 
 
+class TestCorrelate:
+    @pytest.mark.parametrize(
+        "a, v", [([1], [1, 2, 3]), ([1, 2, 3], [1]), ([1, 2, 3], [1, 2])]
+    )
+    @pytest.mark.parametrize("mode", [None, "full", "valid", "same"])
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
+    def test_correlate(self, a, v, mode, dtype):
+        an = numpy.array(a, dtype=dtype)
+        vn = numpy.array(v, dtype=dtype)
+        ad = dpnp.array(an)
+        vd = dpnp.array(vn)
+
+        if mode is None:
+            expected = numpy.correlate(an, vn)
+            result = dpnp.correlate(ad, vd)
+        else:
+            expected = numpy.correlate(an, vn, mode=mode)
+            result = dpnp.correlate(ad, vd, mode=mode)
+
+        assert_dtype_allclose(result, expected)
+
+    def test_correlate_mode_error(self):
+        a = dpnp.arange(5)
+        v = dpnp.arange(3)
+
+        # invalid mode
+        with pytest.raises(ValueError):
+            dpnp.correlate(a, v, mode="unknown")
+
+    @pytest.mark.parametrize("a, v", [([], [1]), ([1], []), ([], [])])
+    def test_correlate_empty(self, a, v):
+        a = dpnp.asarray(a)
+        v = dpnp.asarray(v)
+
+        with pytest.raises(ValueError):
+            dpnp.correlate(a, v)
+
+    @pytest.mark.parametrize(
+        "a, v",
+        [
+            ([[1, 2], [2, 3]], [1]),
+            ([1], [[1, 2], [2, 3]]),
+            ([[1, 2], [2, 3]], [[1, 2], [2, 3]]),
+        ],
+    )
+    def test_correlate_shape_error(self, a, v):
+        a = dpnp.asarray(a)
+        v = dpnp.asarray(v)
+
+        with pytest.raises(ValueError):
+            dpnp.correlate(a, v)
+
+    @pytest.mark.parametrize("size", [2, 10**1, 10**2, 10**3, 10**4, 10**5])
+    def test_correlate_different_sizes(self, size):
+        a = numpy.random.rand(size).astype(numpy.float32)
+        v = numpy.random.rand(size // 2).astype(numpy.float32)
+
+        ad = dpnp.array(a)
+        vd = dpnp.array(v)
+
+        expected = numpy.correlate(a, v)
+        result = dpnp.correlate(ad, vd)
+
+        assert_dtype_allclose(result, expected, factor=20)
+
+    def test_correlate_another_sycl_queue(self):
+        a = dpnp.arange(5, sycl_queue=dpctl.SyclQueue())
+        v = dpnp.arange(3, sycl_queue=dpctl.SyclQueue())
+
+        with pytest.raises(ValueError):
+            dpnp.correlate(a, v)
+
+
 @pytest.mark.parametrize(
     "dtype", get_all_dtypes(no_bool=True, no_none=True, no_complex=True)
 )
diff --git a/dpnp/tests/test_sycl_queue.py b/dpnp/tests/test_sycl_queue.py
index 0041fbff4752..ac96e5d04af9 100644
--- a/dpnp/tests/test_sycl_queue.py
+++ b/dpnp/tests/test_sycl_queue.py
@@ -726,6 +726,7 @@ def test_reduce_hypot(device):
             [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
             [[0.7, 0.8, 0.9], [1.0, 1.1, 1.2]],
         ),
+        pytest.param("correlate", [1, 2, 3], [4, 5, 6]),
         pytest.param("cross", [1.0, 2.0, 3.0], [4.0, 5.0, 6.0]),
         pytest.param("digitize", [0.2, 6.4, 3.0], [0.0, 1.0, 2.5, 4.0]),
         pytest.param(
diff --git a/dpnp/tests/test_usm_type.py b/dpnp/tests/test_usm_type.py
index 29c9419fe84d..9769a4d97ef9 100644
--- a/dpnp/tests/test_usm_type.py
+++ b/dpnp/tests/test_usm_type.py
@@ -696,6 +696,7 @@ def test_1in_1out(func, data, usm_type):
             [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
             [[0.7, 0.8, 0.9], [1.0, 1.1, 1.2]],
         ),
+        pytest.param("correlate", [1, 2, 3], [0, 1, 0.5]),
         # dpnp.dot has 3 different implementations based on input arrays dtype
         # checking all of them
         pytest.param("dot", [3.0, 4.0, 5.0], [1.0, 2.0, 3.0]),
diff --git a/dpnp/tests/third_party/cupy/statistics_tests/test_correlation.py b/dpnp/tests/third_party/cupy/statistics_tests/test_correlation.py
index f8d7feb75c73..e1ee53748f88 100644
--- a/dpnp/tests/third_party/cupy/statistics_tests/test_correlation.py
+++ b/dpnp/tests/third_party/cupy/statistics_tests/test_correlation.py
@@ -164,7 +164,6 @@ def test_cov_empty(self):
         }
     )
 )
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestCorrelateShapeCombination(unittest.TestCase):
 
     @testing.for_all_dtypes(no_float16=True)
@@ -176,7 +175,6 @@ def test_correlate(self, xp, dtype):
 
 
 @pytest.mark.parametrize("mode", ["valid", "full", "same"])
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestCorrelate:
 
     @testing.for_all_dtypes()
diff --git a/tests_external/skipped_tests_numpy.tbl b/tests_external/skipped_tests_numpy.tbl
index a1ca87fb4bdb..8553bb1faa3c 100644
--- a/tests_external/skipped_tests_numpy.tbl
+++ b/tests_external/skipped_tests_numpy.tbl
@@ -1022,7 +1022,6 @@ tests/test_numeric.py::TestClip::test_type_cast_11
 tests/test_numeric.py::TestClip::test_type_cast_12
 tests/test_numeric.py::TestConvolve::test_no_overwrite
 tests/test_numeric.py::TestConvolve::test_object
-tests/test_numeric.py::TestCorrelate::test_object
 tests/test_numeric.py::TestCross::test_2x2
 tests/test_numeric.py::TestCross::test_broadcasting
 tests/test_numeric.py::TestCross::test_broadcasting_shapes

From 9130a5350396b68fc40d4e0f07b56a3fd6cccb3b Mon Sep 17 00:00:00 2001
From: Alexander Kalistratov <alexander.kalistratov@intel.com>
Date: Thu, 28 Nov 2024 04:42:30 +0100
Subject: [PATCH 2/5] Fix signed/unsigned compare warnings & small fixes

---
 .../extensions/statistics/validation_utils.cpp  | 17 ++++++++++-------
 dpnp/dpnp_iface_statistics.py                   |  3 ---
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/dpnp/backend/extensions/statistics/validation_utils.cpp b/dpnp/backend/extensions/statistics/validation_utils.cpp
index 195d5e4df253..d5b2ae381ea1 100644
--- a/dpnp/backend/extensions/statistics/validation_utils.cpp
+++ b/dpnp/backend/extensions/statistics/validation_utils.cpp
@@ -138,10 +138,11 @@ void check_num_dims(const array_ptr &arr,
                     const size_t ndim,
                     const array_names &names)
 {
-    if (arr != nullptr && arr->get_ndim() != ndim) {
+    size_t arr_n_dim = arr != nullptr ? arr->get_ndim() : 0;
+    if (arr != nullptr && arr_n_dim != ndim) {
         throw py::value_error("Array " + name_of(arr, names) + " must be " +
                               std::to_string(ndim) + "D, but got " +
-                              std::to_string(arr->get_ndim()) + "D.");
+                              std::to_string(arr_n_dim) + "D.");
     }
 }
 
@@ -149,11 +150,12 @@ void check_max_dims(const array_ptr &arr,
                     const size_t max_ndim,
                     const array_names &names)
 {
-    if (arr != nullptr && arr->get_ndim() > max_ndim) {
+    size_t arr_n_dim = arr != nullptr ? arr->get_ndim() : 0;
+    if (arr != nullptr && arr_n_dim > max_ndim) {
         throw py::value_error(
             "Array " + name_of(arr, names) + " must have no more than " +
             std::to_string(max_ndim) + " dimensions, but got " +
-            std::to_string(arr->get_ndim()) + " dimensions.");
+            std::to_string(arr_n_dim) + " dimensions.");
     }
 }
 
@@ -161,11 +163,12 @@ void check_size_at_least(const array_ptr &arr,
                          const size_t size,
                          const array_names &names)
 {
-    if (arr != nullptr && arr->get_size() < size) {
+    size_t arr_size = arr != nullptr ? arr->get_size() : 0;
+    if (arr != nullptr && arr_size < size) {
         throw py::value_error("Array " + name_of(arr, names) +
                               " must have at least " + std::to_string(size) +
-                              " elements, but got " +
-                              std::to_string(arr->get_size()) + " elements.");
+                              " elements, but got " + std::to_string(arr_size) +
+                              " elements.");
     }
 }
 
diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py
index 0d44a24dde7d..3cd04561473e 100644
--- a/dpnp/dpnp_iface_statistics.py
+++ b/dpnp/dpnp_iface_statistics.py
@@ -605,9 +605,6 @@ def correlate(a, v, mode="valid"):
     a_casted = dpnp.asarray(a, dtype=supported_dtype, order="C")
     v_casted = dpnp.asarray(v, dtype=supported_dtype, order="C")
 
-    if v.size > a.size:
-        a_casted, v_casted = v_casted, a_casted
-
     r = _run_native_sliding_dot_product1d(a_casted, v_casted, l_pad, r_pad)
 
     if revert:

From 943726f4c86ac571fa1112b87a3b017d1aa3638a Mon Sep 17 00:00:00 2001
From: Alexander Kalistratov <alexander.kalistratov@intel.com>
Date: Sat, 7 Dec 2024 17:11:56 +0100
Subject: [PATCH 3/5] Apply review comments

---
 .../extensions/statistics/bincount.hpp        |  7 +---
 dpnp/backend/extensions/statistics/common.cpp |  8 +---
 dpnp/backend/extensions/statistics/common.hpp |  7 +---
 .../extensions/statistics/dispatch_table.hpp  |  8 +---
 .../extensions/statistics/histogram.cpp       |  4 +-
 .../extensions/statistics/histogram.hpp       |  9 ++---
 .../statistics/histogram_common.cpp           |  3 --
 .../statistics/histogram_common.hpp           |  8 ----
 .../statistics/sliding_dot_product1d.cpp      | 10 ++---
 .../statistics/sliding_dot_product1d.hpp      | 10 +----
 .../statistics/sliding_window1d.cpp           |  3 --
 .../statistics/sliding_window1d.hpp           | 10 -----
 .../statistics/validation_utils.cpp           |  7 +---
 .../statistics/validation_utils.hpp           |  7 +---
 dpnp/dpnp_iface_statistics.py                 | 37 +++++++++----------
 15 files changed, 40 insertions(+), 98 deletions(-)

diff --git a/dpnp/backend/extensions/statistics/bincount.hpp b/dpnp/backend/extensions/statistics/bincount.hpp
index 70a17431383f..f904bb02cbe0 100644
--- a/dpnp/backend/extensions/statistics/bincount.hpp
+++ b/dpnp/backend/extensions/statistics/bincount.hpp
@@ -32,9 +32,7 @@
 
 namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
 
-namespace statistics
-{
-namespace histogram
+namespace statistics::histogram
 {
 struct Bincount
 {
@@ -62,5 +60,4 @@ struct Bincount
 };
 
 void populate_bincount(py::module_ m);
-} // namespace histogram
-} // namespace statistics
+} // namespace statistics::histogram
diff --git a/dpnp/backend/extensions/statistics/common.cpp b/dpnp/backend/extensions/statistics/common.cpp
index 961205293725..7238c8ebbfe6 100644
--- a/dpnp/backend/extensions/statistics/common.cpp
+++ b/dpnp/backend/extensions/statistics/common.cpp
@@ -29,11 +29,8 @@
 
 namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
 
-namespace statistics
+namespace statistics::common
 {
-namespace common
-{
-
 size_t get_max_local_size(const sycl::device &device)
 {
     constexpr const int default_max_cpu_local_size = 256;
@@ -120,5 +117,4 @@ pybind11::dtype dtype_from_typenum(int dst_typenum)
     }
 }
 
-} // namespace common
-} // namespace statistics
+} // namespace statistics::common
diff --git a/dpnp/backend/extensions/statistics/common.hpp b/dpnp/backend/extensions/statistics/common.hpp
index 99a31002ea13..26e49bce8e7c 100644
--- a/dpnp/backend/extensions/statistics/common.hpp
+++ b/dpnp/backend/extensions/statistics/common.hpp
@@ -36,9 +36,7 @@
 #include "utils/math_utils.hpp"
 // clang-format on
 
-namespace statistics
-{
-namespace common
+namespace statistics::common
 {
 
 template <typename N, typename D>
@@ -200,5 +198,4 @@ sycl::nd_range<1>
 // headers of dpctl.
 pybind11::dtype dtype_from_typenum(int dst_typenum);
 
-} // namespace common
-} // namespace statistics
+} // namespace statistics::common
diff --git a/dpnp/backend/extensions/statistics/dispatch_table.hpp b/dpnp/backend/extensions/statistics/dispatch_table.hpp
index 42c170c53617..d0219ecd50e9 100644
--- a/dpnp/backend/extensions/statistics/dispatch_table.hpp
+++ b/dpnp/backend/extensions/statistics/dispatch_table.hpp
@@ -39,11 +39,8 @@
 namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
 namespace py = pybind11;
 
-namespace statistics
+namespace statistics::common
 {
-namespace common
-{
-
 template <typename T, typename Rest>
 struct one_of
 {
@@ -386,5 +383,4 @@ class DispatchTable2
     Table2<FnT> table;
 };
 
-} // namespace common
-} // namespace statistics
+} // namespace statistics::common
diff --git a/dpnp/backend/extensions/statistics/histogram.cpp b/dpnp/backend/extensions/statistics/histogram.cpp
index 848f89451236..cd337718a272 100644
--- a/dpnp/backend/extensions/statistics/histogram.cpp
+++ b/dpnp/backend/extensions/statistics/histogram.cpp
@@ -26,9 +26,7 @@
 #include <algorithm>
 #include <complex>
 #include <memory>
-#include <string>
-#include <type_traits>
-#include <unordered_map>
+#include <tuple>
 #include <vector>
 
 #include <pybind11/pybind11.h>
diff --git a/dpnp/backend/extensions/statistics/histogram.hpp b/dpnp/backend/extensions/statistics/histogram.hpp
index e89737c12946..7df877428aa1 100644
--- a/dpnp/backend/extensions/statistics/histogram.hpp
+++ b/dpnp/backend/extensions/statistics/histogram.hpp
@@ -29,11 +29,9 @@
 
 #include "dispatch_table.hpp"
 
-namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
+// namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
 
-namespace statistics
-{
-namespace histogram
+namespace statistics::histogram
 {
 struct Histogram
 {
@@ -59,5 +57,4 @@ struct Histogram
 };
 
 void populate_histogram(py::module_ m);
-} // namespace histogram
-} // namespace statistics
+} // namespace statistics::histogram
diff --git a/dpnp/backend/extensions/statistics/histogram_common.cpp b/dpnp/backend/extensions/statistics/histogram_common.cpp
index af47fe633674..65eba05be592 100644
--- a/dpnp/backend/extensions/statistics/histogram_common.cpp
+++ b/dpnp/backend/extensions/statistics/histogram_common.cpp
@@ -26,12 +26,9 @@
 #include <algorithm>
 #include <limits>
 #include <string>
-#include <unordered_map>
 #include <vector>
 
 #include "dpctl4pybind11.hpp"
-#include "utils/memory_overlap.hpp"
-#include "utils/output_validation.hpp"
 #include "utils/type_dispatch.hpp"
 
 #include <pybind11/pybind11.h>
diff --git a/dpnp/backend/extensions/statistics/histogram_common.hpp b/dpnp/backend/extensions/statistics/histogram_common.hpp
index e7503fe98778..9a89ae340a33 100644
--- a/dpnp/backend/extensions/statistics/histogram_common.hpp
+++ b/dpnp/backend/extensions/statistics/histogram_common.hpp
@@ -29,14 +29,6 @@
 
 #include "common.hpp"
 
-namespace dpctl
-{
-namespace tensor
-{
-class usm_ndarray;
-}
-} // namespace dpctl
-
 using dpctl::tensor::usm_ndarray;
 
 namespace statistics
diff --git a/dpnp/backend/extensions/statistics/sliding_dot_product1d.cpp b/dpnp/backend/extensions/statistics/sliding_dot_product1d.cpp
index ddc501bc13be..de3dafa2087d 100644
--- a/dpnp/backend/extensions/statistics/sliding_dot_product1d.cpp
+++ b/dpnp/backend/extensions/statistics/sliding_dot_product1d.cpp
@@ -23,12 +23,8 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
-#include <algorithm>
 #include <complex>
 #include <memory>
-#include <string>
-#include <type_traits>
-#include <unordered_map>
 #include <vector>
 
 #include <pybind11/pybind11.h>
@@ -42,7 +38,7 @@
 #include "sliding_dot_product1d.hpp"
 #include "sliding_window1d.hpp"
 
-#include <iostream>
+// #include <iostream>
 
 namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
 using dpctl::tensor::usm_ndarray;
@@ -101,7 +97,9 @@ struct SlidingDotProductF
     }
 };
 
-using SupportedTypes = std::tuple<uint64_t,
+using SupportedTypes = std::tuple<uint32_t,
+                                  int32_t,
+                                  uint64_t,
                                   int64_t,
                                   float,
                                   double,
diff --git a/dpnp/backend/extensions/statistics/sliding_dot_product1d.hpp b/dpnp/backend/extensions/statistics/sliding_dot_product1d.hpp
index 4081dc94a3ee..fd348843ee8a 100644
--- a/dpnp/backend/extensions/statistics/sliding_dot_product1d.hpp
+++ b/dpnp/backend/extensions/statistics/sliding_dot_product1d.hpp
@@ -26,15 +26,10 @@
 #pragma once
 
 #include "dispatch_table.hpp"
-#include "utils/type_dispatch.hpp"
 #include <pybind11/pybind11.h>
 #include <sycl/sycl.hpp>
 
-namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
-
-namespace statistics
-{
-namespace sliding_window1d
+namespace statistics::sliding_window1d
 {
 struct SlidingDotProduct1d
 {
@@ -62,5 +57,4 @@ struct SlidingDotProduct1d
 };
 
 void populate_sliding_dot_product1d(py::module_ m);
-} // namespace sliding_window1d
-} // namespace statistics
+} // namespace statistics::sliding_window1d
diff --git a/dpnp/backend/extensions/statistics/sliding_window1d.cpp b/dpnp/backend/extensions/statistics/sliding_window1d.cpp
index bf7ec2424c8e..412f272353a8 100644
--- a/dpnp/backend/extensions/statistics/sliding_window1d.cpp
+++ b/dpnp/backend/extensions/statistics/sliding_window1d.cpp
@@ -23,10 +23,7 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
-#include <algorithm>
-#include <limits>
 #include <string>
-#include <unordered_map>
 #include <vector>
 
 #include "dpctl4pybind11.hpp"
diff --git a/dpnp/backend/extensions/statistics/sliding_window1d.hpp b/dpnp/backend/extensions/statistics/sliding_window1d.hpp
index c60c9a52adb9..074a780c1029 100644
--- a/dpnp/backend/extensions/statistics/sliding_window1d.hpp
+++ b/dpnp/backend/extensions/statistics/sliding_window1d.hpp
@@ -26,23 +26,13 @@
 #pragma once
 
 #include "utils/math_utils.hpp"
-#include <complex>
 #include <sycl/sycl.hpp>
-#include <tuple>
 #include <type_traits>
 
 #include <stdio.h>
 
 #include "common.hpp"
 
-namespace dpctl
-{
-namespace tensor
-{
-class usm_ndarray;
-}
-} // namespace dpctl
-
 using dpctl::tensor::usm_ndarray;
 
 namespace statistics
diff --git a/dpnp/backend/extensions/statistics/validation_utils.cpp b/dpnp/backend/extensions/statistics/validation_utils.cpp
index d5b2ae381ea1..1a3415543b51 100644
--- a/dpnp/backend/extensions/statistics/validation_utils.cpp
+++ b/dpnp/backend/extensions/statistics/validation_utils.cpp
@@ -53,9 +53,7 @@ sycl::queue get_queue(const std::vector<array_ptr> &inputs,
 }
 } // namespace
 
-namespace statistics
-{
-namespace validation
+namespace statistics::validation
 {
 std::string name_of(const array_ptr &arr, const array_names &names)
 {
@@ -189,5 +187,4 @@ void common_checks(const std::vector<array_ptr> &inputs,
     check_no_overlap(inputs, outputs, names);
 }
 
-} // namespace validation
-} // namespace statistics
+} // namespace statistics::validation
diff --git a/dpnp/backend/extensions/statistics/validation_utils.hpp b/dpnp/backend/extensions/statistics/validation_utils.hpp
index 845cbc9d9928..a1e0ff521ce6 100644
--- a/dpnp/backend/extensions/statistics/validation_utils.hpp
+++ b/dpnp/backend/extensions/statistics/validation_utils.hpp
@@ -31,9 +31,7 @@
 
 #include "dpctl4pybind11.hpp"
 
-namespace statistics
-{
-namespace validation
+namespace statistics::validation
 {
 using array_ptr = const dpctl::tensor::usm_ndarray *;
 using array_names = std::unordered_map<array_ptr, std::string>;
@@ -69,5 +67,4 @@ void check_size_at_least(const array_ptr &arr,
 void common_checks(const std::vector<array_ptr> &inputs,
                    const std::vector<array_ptr> &outputs,
                    const array_names &names);
-} // namespace validation
-} // namespace statistics
+} // namespace statistics::validation
diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py
index 3cd04561473e..b82a74e2f95f 100644
--- a/dpnp/dpnp_iface_statistics.py
+++ b/dpnp/dpnp_iface_statistics.py
@@ -440,8 +440,7 @@ def corrcoef(x, y=None, rowvar=True, *, dtype=None):
 
 
 def _get_padding(a_size, v_size, mode):
-    if v_size > a_size:
-        a_size, v_size = v_size, a_size
+    assert v_size <= a_size
 
     if mode == "valid":
         l_pad, r_pad = 0, 0
@@ -463,9 +462,8 @@ def _run_native_sliding_dot_product1d(a, v, l_pad, r_pad):
 
     usm_type = dpu.get_coerced_usm_type([a.usm_type, v.usm_type])
     out_size = l_pad + r_pad + a.size - v.size + 1
-    out = dpnp.empty(
-        shape=out_size, sycl_queue=queue, dtype=a.dtype, usm_type=usm_type
-    )
+    # out type is the same as input type
+    out = dpnp.empty_like(a, shape=out_size, usm_type=usm_type)
 
     a_usm = dpnp.get_usm_ndarray(a)
     v_usm = dpnp.get_usm_ndarray(v)
@@ -491,11 +489,11 @@ def correlate(a, v, mode="valid"):
     Cross-correlation of two 1-dimensional sequences.
 
     This function computes the correlation as generally defined in signal
-    processing texts [1]:
+    processing texts [1]_:
 
     .. math:: c_k = \sum_n a_{n+k} \cdot \overline{v}_n
 
-    with a and v sequences being zero-padded where necessary and
+    with `a` and `v` sequences being zero-padded where necessary and
     :math:`\overline v` denoting complex conjugation.
 
     For full documentation refer to :obj:`numpy.correlate`.
@@ -506,16 +504,16 @@ def correlate(a, v, mode="valid"):
         First input array.
     v : {dpnp.ndarray, usm_ndarray}
         Second input array.
-    mode : {'valid', 'same', 'full'}, optional
+    mode : {"valid", "same", "full"}, optional
         Refer to the :obj:`dpnp.convolve` docstring. Note that the default
-        is ``'valid'``, unlike :obj:`dpnp.convolve`, which uses ``'full'``.
+        is ``"valid"``, unlike :obj:`dpnp.convolve`, which uses ``"full"``.
 
-        Default: ``'valid'``.
+        Default: ``"valid"``.
 
     Notes
     -----
     The definition of correlation above is not unique and sometimes
-    correlation may be defined differently. Another common definition is [1]:
+    correlation may be defined differently. Another common definition is [1]_:
 
     .. math:: c'_k = \sum_n a_{n} \cdot \overline{v_{n+k}}
 
@@ -533,8 +531,8 @@ def correlate(a, v, mode="valid"):
 
     See Also
     --------
-    :obj:`dpnp.convolve` : Discrete, linear convolution of two
-    one-dimensional sequences.
+    :obj:`dpnp.convolve` : Discrete, linear convolution of two one-dimensional
+                        sequences.
 
 
     Examples
@@ -546,7 +544,7 @@ def correlate(a, v, mode="valid"):
     array([3.5], dtype=float32)
     >>> np.correlate(a, v, "same")
     array([2. , 3.5, 3. ], dtype=float32)
-    >>> np.correlate([1, 2, 3], [0, 1, 0.5], "full")
+    >>> np.correlate([a, v, "full")
     array([0.5, 2. , 3.5, 3. , 0. ], dtype=float32)
 
     Using complex sequences:
@@ -557,10 +555,10 @@ def correlate(a, v, mode="valid"):
     array([0.5-0.5j, 1. +0.j , 1.5-1.5j, 3. -1.j , 0. +0.j ], dtype=complex64)
 
     Note that you get the time reversed, complex conjugated result
-    (:math:`\overline{c_{-k}}`) when the two input sequences a and v change
+    (:math:`\overline{c_{-k}}`) when the two input sequences `a` and `v` change
     places:
 
-    >>> np.correlate([0, 1, 0.5j], [1+1j, 2, 3-1j], 'full')
+    >>> np.correlate(vc, ac, 'full')
     array([0. +0.j , 3. +1.j , 1.5+1.5j, 1. +0.j , 0.5+0.5j], dtype=complex64)
 
     """
@@ -586,10 +584,11 @@ def correlate(a, v, mode="valid"):
 
     if supported_dtype is None:
         raise ValueError(
-            f"function '{correlate}' does not support input types "
-            f"({a.dtype}, {v.dtype}), "
+            f"function does not support input types "
+            f"({a.dtype.name}, {v.dtype.name}), "
             "and the inputs could not be coerced to any "
-            f"supported types. List of supported types: {supported_types}"
+            f"supported types. List of supported types: "
+            f"{[st.name for st in supported_types]}"
         )
 
     if dpnp.issubdtype(v.dtype, dpnp.complexfloating):

From a4e627df3d5d8aa4aaf8da5241d295914fdcdb76 Mon Sep 17 00:00:00 2001
From: Alexander Kalistratov <alexander.kalistratov@intel.com>
Date: Mon, 9 Dec 2024 18:07:28 +0100
Subject: [PATCH 4/5] Apply review comments 2

---
 dpnp/dpnp_iface_statistics.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py
index b82a74e2f95f..76d6c16defd1 100644
--- a/dpnp/dpnp_iface_statistics.py
+++ b/dpnp/dpnp_iface_statistics.py
@@ -510,6 +510,11 @@ def correlate(a, v, mode="valid"):
 
         Default: ``"valid"``.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Discrete cross-correlation of `a` and `v`.
+
     Notes
     -----
     The definition of correlation above is not unique and sometimes
@@ -524,11 +529,6 @@ def correlate(a, v, mode="valid"):
     .. [1] Wikipedia, "Cross-correlation",
            https://en.wikipedia.org/wiki/Cross-correlation
 
-    Returns
-    -------
-    out : {dpnp.ndarray}
-        Discrete cross-correlation of `a` and `v`.
-
     See Also
     --------
     :obj:`dpnp.convolve` : Discrete, linear convolution of two one-dimensional

From bb795fbb561cd441e92cdcf8ad6a2e2f917bcbe2 Mon Sep 17 00:00:00 2001
From: Alexander Kalistratov <alexander.kalistratov@intel.com>
Date: Mon, 9 Dec 2024 18:11:40 +0100
Subject: [PATCH 5/5] Apply review comments 3

---
 dpnp/backend/extensions/statistics/histogram.hpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/dpnp/backend/extensions/statistics/histogram.hpp b/dpnp/backend/extensions/statistics/histogram.hpp
index a1eeb191884d..c7cf5aeb1b80 100644
--- a/dpnp/backend/extensions/statistics/histogram.hpp
+++ b/dpnp/backend/extensions/statistics/histogram.hpp
@@ -31,8 +31,6 @@
 #include "dispatch_table.hpp"
 #include "dpctl4pybind11.hpp"
 
-// namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
-
 namespace statistics::histogram
 {
 struct Histogram