diff --git a/.github/workflows/os-llvm-sycl-build.yml b/.github/workflows/os-llvm-sycl-build.yml
index 19180b6e2a..b8870bddef 100644
--- a/.github/workflows/os-llvm-sycl-build.yml
+++ b/.github/workflows/os-llvm-sycl-build.yml
@@ -13,11 +13,11 @@ jobs:
 
     env:
       DOWNLOAD_URL_PREFIX: https://github.com/intel/llvm/releases/download
-      DRIVER_PATH: 2023-WW27
-      OCLCPUEXP_FN: oclcpuexp-2023.16.6.0.28_rel.tar.gz
-      TBB_URL: https://github.com/oneapi-src/oneTBB/releases/download/v2021.9.0/
-      TBB_INSTALL_DIR: oneapi-tbb-2021.9.0
-      TBB_FN: oneapi-tbb-2021.9.0-lin.tgz
+      DRIVER_PATH: 2024-WW25
+      OCLCPUEXP_FN: oclcpuexp-2024.18.6.0.02_rel.tar.gz
+      TBB_URL: https://github.com/oneapi-src/oneTBB/releases/download/v2021.12.0/
+      TBB_INSTALL_DIR: oneapi-tbb-2021.12.0
+      TBB_FN: oneapi-tbb-2021.12.0-lin.tgz
 
     steps:
       - name: Cancel Previous Runs
@@ -159,6 +159,4 @@ jobs:
           SYCL_CACHE_PERSISTENT: 1
         run: |
           source set_allvars.sh
-          # Skip the test that checks if there is only one hard
-          # copy of DPCTLSyclInterface library
-          python -m pytest -v dpctl/tests --no-sycl-interface-test
+          python -m pytest -sv dpctl/tests
diff --git a/dpctl/tensor/libtensor/include/utils/offset_utils.hpp b/dpctl/tensor/libtensor/include/utils/offset_utils.hpp
index 1ad89c4fac..bbd384125d 100644
--- a/dpctl/tensor/libtensor/include/utils/offset_utils.hpp
+++ b/dpctl/tensor/libtensor/include/utils/offset_utils.hpp
@@ -27,6 +27,7 @@
 #pragma once
 
 #include <algorithm>
+#include <exception>
 #include <sycl/sycl.hpp>
 #include <tuple>
 #include <vector>
@@ -81,6 +82,30 @@ std::vector<T, A> concat(std::vector<T, A> lhs, Vs &&...vs)
 
 } // namespace detail
 
+template <typename T>
+class usm_host_allocator : public sycl::usm_allocator<T, sycl::usm::alloc::host>
+{
+public:
+    using baseT = sycl::usm_allocator<T, sycl::usm::alloc::host>;
+    using baseT::baseT;
+
+    template <typename U> struct rebind
+    {
+        typedef usm_host_allocator<U> other;
+    };
+
+    void deallocate(T *ptr, size_t n)
+    {
+        try {
+            baseT::deallocate(ptr, n);
+        } catch (const std::exception &e) {
+            std::cerr
+                << "Exception caught in `usm_host_allocator::deallocate`: "
+                << e.what() << std::endl;
+        }
+    }
+};
+
 template <typename indT, typename... Vs>
 std::tuple<indT *, size_t, sycl::event>
 device_allocate_and_pack(sycl::queue &q,
@@ -90,8 +115,7 @@ device_allocate_and_pack(sycl::queue &q,
 
     // memory transfer optimization, use USM-host for temporary speeds up
     // transfer to device, especially on dGPUs
-    using usm_host_allocatorT =
-        sycl::usm_allocator<indT, sycl::usm::alloc::host>;
+    using usm_host_allocatorT = usm_host_allocator<indT>;
     using shT = std::vector<indT, usm_host_allocatorT>;
 
     usm_host_allocatorT usm_host_allocator(q);
diff --git a/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp b/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp
index 56db97eab7..77ec075ccf 100644
--- a/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp
+++ b/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp
@@ -35,6 +35,7 @@
 #include "dpctl4pybind11.hpp"
 #include "kernels/integer_advanced_indexing.hpp"
 #include "utils/memory_overlap.hpp"
+#include "utils/offset_utils.hpp"
 #include "utils/output_validation.hpp"
 #include "utils/type_dispatch.hpp"
 #include "utils/type_utils.hpp"
@@ -91,7 +92,7 @@ _populate_kernel_params(sycl::queue &exec_q,
 {
 
     using usm_host_allocator_T =
-        sycl::usm_allocator<char *, sycl::usm::alloc::host>;
+        dpctl::tensor::offset_utils::usm_host_allocator<char *>;
     using ptrT = std::vector<char *, usm_host_allocator_T>;
 
     usm_host_allocator_T ptr_allocator(exec_q);
@@ -99,7 +100,7 @@ _populate_kernel_params(sycl::queue &exec_q,
         std::make_shared<ptrT>(k, ptr_allocator);
 
     using usm_host_allocatorT =
-        sycl::usm_allocator<py::ssize_t, sycl::usm::alloc::host>;
+        dpctl::tensor::offset_utils::usm_host_allocator<py::ssize_t>;
     using shT = std::vector<py::ssize_t, usm_host_allocatorT>;
 
     usm_host_allocatorT sz_allocator(exec_q);
diff --git a/dpctl/tensor/libtensor/source/triul_ctor.cpp b/dpctl/tensor/libtensor/source/triul_ctor.cpp
index 5c6f21c6f7..4f70e27e10 100644
--- a/dpctl/tensor/libtensor/source/triul_ctor.cpp
+++ b/dpctl/tensor/libtensor/source/triul_ctor.cpp
@@ -32,6 +32,7 @@
 #include "kernels/constructors.hpp"
 #include "simplify_iteration_space.hpp"
 #include "utils/memory_overlap.hpp"
+#include "utils/offset_utils.hpp"
 #include "utils/output_validation.hpp"
 #include "utils/type_dispatch.hpp"
 
@@ -150,7 +151,7 @@ usm_ndarray_triul(sycl::queue &exec_q,
     nd += 2;
 
     using usm_host_allocatorT =
-        sycl::usm_allocator<py::ssize_t, sycl::usm::alloc::host>;
+        dpctl::tensor::offset_utils::usm_host_allocator<py::ssize_t>;
     using usmshT = std::vector<py::ssize_t, usm_host_allocatorT>;
 
     usm_host_allocatorT allocator(exec_q);
diff --git a/dpctl/tests/test_usm_ndarray_print.py b/dpctl/tests/test_usm_ndarray_print.py
index 9e15fa3310..983cb75d98 100644
--- a/dpctl/tests/test_usm_ndarray_print.py
+++ b/dpctl/tests/test_usm_ndarray_print.py
@@ -283,7 +283,6 @@ def test_print_repr(self):
         x = dpt.arange(4, dtype="i4", sycl_queue=q)
         x.sycl_queue.wait()
         r = repr(x)
-        print(r)
         assert r == "usm_ndarray([0, 1, 2, 3], dtype=int32)"
 
         dpt.set_print_options(linewidth=1)