diff --git a/.github/workflows/os-llvm-sycl-build.yml b/.github/workflows/os-llvm-sycl-build.yml index 19180b6e2a..b8870bddef 100644 --- a/.github/workflows/os-llvm-sycl-build.yml +++ b/.github/workflows/os-llvm-sycl-build.yml @@ -13,11 +13,11 @@ jobs: env: DOWNLOAD_URL_PREFIX: https://github.com/intel/llvm/releases/download - DRIVER_PATH: 2023-WW27 - OCLCPUEXP_FN: oclcpuexp-2023.16.6.0.28_rel.tar.gz - TBB_URL: https://github.com/oneapi-src/oneTBB/releases/download/v2021.9.0/ - TBB_INSTALL_DIR: oneapi-tbb-2021.9.0 - TBB_FN: oneapi-tbb-2021.9.0-lin.tgz + DRIVER_PATH: 2024-WW25 + OCLCPUEXP_FN: oclcpuexp-2024.18.6.0.02_rel.tar.gz + TBB_URL: https://github.com/oneapi-src/oneTBB/releases/download/v2021.12.0/ + TBB_INSTALL_DIR: oneapi-tbb-2021.12.0 + TBB_FN: oneapi-tbb-2021.12.0-lin.tgz steps: - name: Cancel Previous Runs @@ -159,6 +159,4 @@ jobs: SYCL_CACHE_PERSISTENT: 1 run: | source set_allvars.sh - # Skip the test that checks if there is only one hard - # copy of DPCTLSyclInterface library - python -m pytest -v dpctl/tests --no-sycl-interface-test + python -m pytest -sv dpctl/tests diff --git a/dpctl/tensor/libtensor/include/utils/offset_utils.hpp b/dpctl/tensor/libtensor/include/utils/offset_utils.hpp index 1ad89c4fac..bbd384125d 100644 --- a/dpctl/tensor/libtensor/include/utils/offset_utils.hpp +++ b/dpctl/tensor/libtensor/include/utils/offset_utils.hpp @@ -27,6 +27,7 @@ #pragma once #include +#include #include #include #include @@ -81,6 +82,30 @@ std::vector concat(std::vector lhs, Vs &&...vs) } // namespace detail +template +class usm_host_allocator : public sycl::usm_allocator +{ +public: + using baseT = sycl::usm_allocator; + using baseT::baseT; + + template struct rebind + { + typedef usm_host_allocator other; + }; + + void deallocate(T *ptr, size_t n) + { + try { + baseT::deallocate(ptr, n); + } catch (const std::exception &e) { + std::cerr + << "Exception caught in `usm_host_allocator::deallocate`: " + << e.what() << std::endl; + } + } +}; + template std::tuple device_allocate_and_pack(sycl::queue &q, @@ -90,8 +115,7 @@ device_allocate_and_pack(sycl::queue &q, // memory transfer optimization, use USM-host for temporary speeds up // transfer to device, especially on dGPUs - using usm_host_allocatorT = - sycl::usm_allocator; + using usm_host_allocatorT = usm_host_allocator; using shT = std::vector; usm_host_allocatorT usm_host_allocator(q); diff --git a/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp b/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp index 56db97eab7..77ec075ccf 100644 --- a/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp +++ b/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp @@ -35,6 +35,7 @@ #include "dpctl4pybind11.hpp" #include "kernels/integer_advanced_indexing.hpp" #include "utils/memory_overlap.hpp" +#include "utils/offset_utils.hpp" #include "utils/output_validation.hpp" #include "utils/type_dispatch.hpp" #include "utils/type_utils.hpp" @@ -91,7 +92,7 @@ _populate_kernel_params(sycl::queue &exec_q, { using usm_host_allocator_T = - sycl::usm_allocator; + dpctl::tensor::offset_utils::usm_host_allocator; using ptrT = std::vector; usm_host_allocator_T ptr_allocator(exec_q); @@ -99,7 +100,7 @@ _populate_kernel_params(sycl::queue &exec_q, std::make_shared(k, ptr_allocator); using usm_host_allocatorT = - sycl::usm_allocator; + dpctl::tensor::offset_utils::usm_host_allocator; using shT = std::vector; usm_host_allocatorT sz_allocator(exec_q); diff --git a/dpctl/tensor/libtensor/source/triul_ctor.cpp b/dpctl/tensor/libtensor/source/triul_ctor.cpp index 5c6f21c6f7..4f70e27e10 100644 --- a/dpctl/tensor/libtensor/source/triul_ctor.cpp +++ b/dpctl/tensor/libtensor/source/triul_ctor.cpp @@ -32,6 +32,7 @@ #include "kernels/constructors.hpp" #include "simplify_iteration_space.hpp" #include "utils/memory_overlap.hpp" +#include "utils/offset_utils.hpp" #include "utils/output_validation.hpp" #include "utils/type_dispatch.hpp" @@ -150,7 +151,7 @@ usm_ndarray_triul(sycl::queue &exec_q, nd += 2; using usm_host_allocatorT = - sycl::usm_allocator; + dpctl::tensor::offset_utils::usm_host_allocator; using usmshT = std::vector; usm_host_allocatorT allocator(exec_q); diff --git a/dpctl/tests/test_usm_ndarray_print.py b/dpctl/tests/test_usm_ndarray_print.py index 9e15fa3310..983cb75d98 100644 --- a/dpctl/tests/test_usm_ndarray_print.py +++ b/dpctl/tests/test_usm_ndarray_print.py @@ -283,7 +283,6 @@ def test_print_repr(self): x = dpt.arange(4, dtype="i4", sycl_queue=q) x.sycl_queue.wait() r = repr(x) - print(r) assert r == "usm_ndarray([0, 1, 2, 3], dtype=int32)" dpt.set_print_options(linewidth=1)