Skip to content

Commit 02ac7fa

Browse files
Write usm_host_allocator that wraps call to free in try/catch
Wrote dpctl::tensor::offset_utils::usm_host_allocator<T> to allocate USM-host memory as storage to std::vector. Replaced uses of sycl::usm_memory<T, sycl::alloc::kind::host>. The new class derives from this, but overrides deallocate method to wrap call to base::deallocate in try/except. The exception, if caught, is printed but otherwise ignored, consistent like this is done on USMDeleter class used in dpctl.memory This is to work around sporadic crashes due to unhandled exception thrown by openCL::CPU driver, which appears to be benign. The issue was reported to CPU driver team, with native reproducer (compiler LLVM jira ticket 58387).
1 parent 8257ca9 commit 02ac7fa

File tree

3 files changed

+31
-5
lines changed

3 files changed

+31
-5
lines changed

dpctl/tensor/libtensor/include/utils/offset_utils.hpp

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#pragma once
2828

2929
#include <algorithm>
30+
#include <exception>
3031
#include <sycl/sycl.hpp>
3132
#include <tuple>
3233
#include <vector>
@@ -81,6 +82,30 @@ std::vector<T, A> concat(std::vector<T, A> lhs, Vs &&...vs)
8182

8283
} // namespace detail
8384

85+
template <typename T>
86+
class usm_host_allocator : public sycl::usm_allocator<T, sycl::usm::alloc::host>
87+
{
88+
public:
89+
using baseT = sycl::usm_allocator<T, sycl::usm::alloc::host>;
90+
using baseT::baseT;
91+
92+
template <typename U> struct rebind
93+
{
94+
typedef usm_host_allocator<U> other;
95+
};
96+
97+
void deallocate(T *ptr, size_t n)
98+
{
99+
try {
100+
baseT::deallocate(ptr, n);
101+
} catch (const std::exception &e) {
102+
std::cerr
103+
<< "Exception caught in `usm_host_allocator::deallocate`: "
104+
<< e.what() << std::endl;
105+
}
106+
}
107+
};
108+
84109
template <typename indT, typename... Vs>
85110
std::tuple<indT *, size_t, sycl::event>
86111
device_allocate_and_pack(sycl::queue &q,
@@ -90,8 +115,7 @@ device_allocate_and_pack(sycl::queue &q,
90115

91116
// memory transfer optimization, use USM-host for temporary speeds up
92117
// transfer to device, especially on dGPUs
93-
using usm_host_allocatorT =
94-
sycl::usm_allocator<indT, sycl::usm::alloc::host>;
118+
using usm_host_allocatorT = usm_host_allocator<indT>;
95119
using shT = std::vector<indT, usm_host_allocatorT>;
96120

97121
usm_host_allocatorT usm_host_allocator(q);

dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "dpctl4pybind11.hpp"
3636
#include "kernels/integer_advanced_indexing.hpp"
3737
#include "utils/memory_overlap.hpp"
38+
#include "utils/offset_utils.hpp"
3839
#include "utils/output_validation.hpp"
3940
#include "utils/type_dispatch.hpp"
4041
#include "utils/type_utils.hpp"
@@ -91,15 +92,15 @@ _populate_kernel_params(sycl::queue &exec_q,
9192
{
9293

9394
using usm_host_allocator_T =
94-
sycl::usm_allocator<char *, sycl::usm::alloc::host>;
95+
dpctl::tensor::offset_utils::usm_host_allocator<char *>;
9596
using ptrT = std::vector<char *, usm_host_allocator_T>;
9697

9798
usm_host_allocator_T ptr_allocator(exec_q);
9899
std::shared_ptr<ptrT> host_ind_ptrs_shp =
99100
std::make_shared<ptrT>(k, ptr_allocator);
100101

101102
using usm_host_allocatorT =
102-
sycl::usm_allocator<py::ssize_t, sycl::usm::alloc::host>;
103+
dpctl::tensor::offset_utils::usm_host_allocator<py::ssize_t>;
103104
using shT = std::vector<py::ssize_t, usm_host_allocatorT>;
104105

105106
usm_host_allocatorT sz_allocator(exec_q);

dpctl/tensor/libtensor/source/triul_ctor.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "kernels/constructors.hpp"
3333
#include "simplify_iteration_space.hpp"
3434
#include "utils/memory_overlap.hpp"
35+
#include "utils/offset_utils.hpp"
3536
#include "utils/output_validation.hpp"
3637
#include "utils/type_dispatch.hpp"
3738

@@ -150,7 +151,7 @@ usm_ndarray_triul(sycl::queue &exec_q,
150151
nd += 2;
151152

152153
using usm_host_allocatorT =
153-
sycl::usm_allocator<py::ssize_t, sycl::usm::alloc::host>;
154+
dpctl::tensor::offset_utils::usm_host_allocator<py::ssize_t>;
154155
using usmshT = std::vector<py::ssize_t, usm_host_allocatorT>;
155156

156157
usm_host_allocatorT allocator(exec_q);

0 commit comments

Comments
 (0)