diff --git a/CMakeLists.txt b/CMakeLists.txt index 46e988bdcbe6..ccb2bef6972e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -80,7 +80,7 @@ set(CYTHON_FLAGS "-t -w \"${CMAKE_SOURCE_DIR}\"") find_package(Cython REQUIRED) find_package(Dpctl REQUIRED) -message(STATUS "Dpctl_INCLUDE_DIRS=" ${Dpctl_INCLUDE_DIRS}) +message(STATUS "Dpctl_INCLUDE_DIR=" ${Dpctl_INCLUDE_DIR}) message(STATUS "Dpctl_TENSOR_INCLUDE_DIR=" ${Dpctl_TENSOR_INCLUDE_DIR}) if(WIN32) diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt index ec4161b81f3a..0cc04dbd0f4c 100644 --- a/dpnp/backend/CMakeLists.txt +++ b/dpnp/backend/CMakeLists.txt @@ -107,6 +107,8 @@ target_compile_definitions(${_trgt} PUBLIC PSTL_USE_PARALLEL_POLICIES=0) target_compile_definitions(${_trgt} PUBLIC ONEDPL_USE_PREDEFINED_POLICIES=0) target_include_directories(${_trgt} PUBLIC ${Dpctl_INCLUDE_DIR}) +target_include_directories(${_trgt} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR}) + target_link_directories(${_trgt} PUBLIC "${Dpctl_INCLUDE_DIR}/..") target_link_libraries(${_trgt} PUBLIC DPCTLSyclInterface) diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt index 341845aa2db6..0cfdc1a16773 100644 --- a/dpnp/backend/extensions/lapack/CMakeLists.txt +++ b/dpnp/backend/extensions/lapack/CMakeLists.txt @@ -52,7 +52,7 @@ set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDEN target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include) target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src) -target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS}) +target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR}) target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR}) if (WIN32) diff --git a/dpnp/backend/extensions/sycl_ext/CMakeLists.txt b/dpnp/backend/extensions/sycl_ext/CMakeLists.txt index 64aedd496b08..6b1c76076d54 100644 --- a/dpnp/backend/extensions/sycl_ext/CMakeLists.txt +++ b/dpnp/backend/extensions/sycl_ext/CMakeLists.txt @@ -45,7 +45,7 @@ set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDEN target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include) target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src) -target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS}) +target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR}) target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR}) if (WIN32) diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt index 47347eb54be0..03a5038ebb82 100644 --- a/dpnp/backend/extensions/vm/CMakeLists.txt +++ b/dpnp/backend/extensions/vm/CMakeLists.txt @@ -45,7 +45,7 @@ set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDEN target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include) target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src) -target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS}) +target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR}) target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR}) if (WIN32) diff --git a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp index 24ee5c95c309..9db8425f6de4 100644 --- a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp @@ -32,6 +32,12 @@ #include "dpnpc_memory_adapter.hpp" #include "queue_sycl.hpp" +// dpctl tensor headers +#include "kernels/alignment.hpp" + +using dpctl::tensor::kernels::alignment_utils::is_aligned; +using dpctl::tensor::kernels::alignment_utils::required_alignment; + template class dpnp_invert_c_kernel; @@ -67,7 +73,10 @@ DPCTLSyclEventRef dpnp_invert_c(DPCTLSyclQueueRef q_ref, vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + sg.get_group_id()[0] * max_sg_size); - if (start + static_cast(vec_sz) * max_sg_size < size) { + if (is_aligned(input_data) && + is_aligned(result) && + (start + static_cast(vec_sz) * max_sg_size < size)) + { auto input_multi_ptr = sycl::address_space_cast< sycl::access::address_space::global_space, sycl::access::decorated::yes>(&input_data[start]); diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp index 58186ba482e5..ac07698ebfeb 100644 --- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp @@ -34,6 +34,12 @@ #include "dpnpc_memory_adapter.hpp" #include "queue_sycl.hpp" +// dpctl tensor headers +#include "kernels/alignment.hpp" + +using dpctl::tensor::kernels::alignment_utils::is_aligned; +using dpctl::tensor::kernels::alignment_utils::required_alignment; + #define MACRO_1ARG_2TYPES_OP(__name__, __operation1__, __operation2__) \ template \ @@ -1198,8 +1204,12 @@ static void func_map_init_elemwise_1arg_1type(func_map_t &fmap) (nd_it.get_group(0) * nd_it.get_local_range(0) + \ sg.get_group_id()[0] * max_sg_size); \ \ - if (start + static_cast(vec_sz) * max_sg_size < \ - result_size) { \ + if (is_aligned(input1_data) && \ + is_aligned(input2_data) && \ + is_aligned(result) && \ + (start + static_cast(vec_sz) * max_sg_size < \ + result_size)) \ + { \ auto input1_multi_ptr = sycl::address_space_cast< \ sycl::access::address_space::global_space, \ sycl::access::decorated::yes>( \ diff --git a/dpnp/backend/kernels/dpnp_krnl_logic.cpp b/dpnp/backend/kernels/dpnp_krnl_logic.cpp index 00e60890c641..0174b47339a8 100644 --- a/dpnp/backend/kernels/dpnp_krnl_logic.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_logic.cpp @@ -31,6 +31,12 @@ #include "dpnpc_memory_adapter.hpp" #include "queue_sycl.hpp" +// dpctl tensor headers +#include "kernels/alignment.hpp" + +using dpctl::tensor::kernels::alignment_utils::is_aligned; +using dpctl::tensor::kernels::alignment_utils::required_alignment; + template class dpnp_all_c_kernel; @@ -610,8 +616,12 @@ DPCTLSyclEventRef (*dpnp_any_ext_c)(DPCTLSyclQueueRef, vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + \ sg.get_group_id()[0] * max_sg_size); \ \ - if (start + static_cast(vec_sz) * max_sg_size < \ - result_size) { \ + if (is_aligned(input1_data) && \ + is_aligned(input2_data) && \ + is_aligned(result) && \ + (start + static_cast(vec_sz) * max_sg_size < \ + result_size)) \ + { \ auto input1_multi_ptr = sycl::address_space_cast< \ sycl::access::address_space::global_space, \ sycl::access::decorated::yes>(&input1_data[start]); \ diff --git a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp index e962f8842246..d80ccfa186e0 100644 --- a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp @@ -35,6 +35,12 @@ #include "dpnpc_memory_adapter.hpp" #include "queue_sycl.hpp" +// dpctl tensor headers +#include "kernels/alignment.hpp" + +using dpctl::tensor::kernels::alignment_utils::is_aligned; +using dpctl::tensor::kernels::alignment_utils::required_alignment; + static_assert(__SYCL_COMPILER_VERSION >= __SYCL_COMPILER_VECTOR_ABS_CHANGED, "SYCL DPC++ compiler does not meet minimum version requirement"); @@ -163,7 +169,10 @@ DPCTLSyclEventRef vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + sg.get_group_id()[0] * max_sg_size); - if (start + static_cast(vec_sz) * max_sg_size < size) { + if (is_aligned(array1) && + is_aligned(result) && + (start + static_cast(vec_sz) * max_sg_size < size)) + { auto array_multi_ptr = sycl::address_space_cast< sycl::access::address_space::global_space, sycl::access::decorated::yes>(&array1[start]);