From 87d5ab3736de92904d6714b13ca5dcb9b0963aef Mon Sep 17 00:00:00 2001 From: "Wang, Eikan" Date: Mon, 25 May 2020 10:24:08 +0800 Subject: [PATCH 1/3] Add AVX512 macro in CMake to enable AVX512 --- cmake/CPU.cmake | 7 ++++--- torch_ipex/csrc/cpu/bf16/Converter.cpp | 6 ++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/cmake/CPU.cmake b/cmake/CPU.cmake index 8ae17fb3e..72693d419 100644 --- a/cmake/CPU.cmake +++ b/cmake/CPU.cmake @@ -17,7 +17,7 @@ list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules) FIND_PACKAGE(AVX) -IF (NOT C_AVX512_FOUND) +IF (NOT C_AVX512_FOUND AND NOT CXX_AVX512_FOUND) message(FATAL_ERROR "Please build IPEX on Machines that support AVX512.") ENDIF() @@ -58,13 +58,14 @@ endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=pedantic") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=redundant-decls") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=old-style-cast") -IF (C_AVX512_FOUND) +IF (C_AVX512_FOUND OR CXX_AVX512_FOUND) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DAVX512") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512bw") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512vl") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mf16c") ENDIF() -IF (C_AVX512_BF16_FOUND) +IF (C_AVX512_BF16_FOUND OR CXX_AVX512_BF16_FOUND) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512bf16 -DAVX512_BF16") ENDIF() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") diff --git a/torch_ipex/csrc/cpu/bf16/Converter.cpp b/torch_ipex/csrc/cpu/bf16/Converter.cpp index 97ebfef75..3819a33d6 100644 --- a/torch_ipex/csrc/cpu/bf16/Converter.cpp +++ b/torch_ipex/csrc/cpu/bf16/Converter.cpp @@ -1,5 +1,7 @@ #include "Converter.h" +#include + #if defined(AVX512) #include "vec/vec_type_cvt.h" #define BF16_2_FP32(dst, src, len) cvt_bf16_to_fp32(dst, src, len) @@ -15,11 +17,11 @@ namespace bf16 { namespace converter { void bf16_to_fp32(void *dst, const void *src, int len) { - BF16_2_FP32(dst, src, len); + BF16_2_FP32((float *)dst, (at::BFloat16 *)src, len); } void fp32_to_bf16(void *dst, const void *src, int len) { - FP32_2_BF16(dst, src, len); + FP32_2_BF16((at::BFloat16 *)dst, (float *)src, len); } } // namespace converter From a4d5402d03befd64d8d8ead2a55ef679ff2308ec Mon Sep 17 00:00:00 2001 From: "Wang, Eikan" Date: Tue, 26 May 2020 14:01:50 +0800 Subject: [PATCH 2/3] Cannot use the input dil tensor to check is_public_format or not because it out of scope --- torch_ipex/csrc/cpu/dbl/Common.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/torch_ipex/csrc/cpu/dbl/Common.cpp b/torch_ipex/csrc/cpu/dbl/Common.cpp index 07c55fbd7..3be05955d 100644 --- a/torch_ipex/csrc/cpu/dbl/Common.cpp +++ b/torch_ipex/csrc/cpu/dbl/Common.cpp @@ -54,10 +54,10 @@ at::Tensor gen_aten_tensor_by(dil::tensor&& dil_tensor) { shade_data_context->dil_tensor = std::forward(dil_tensor); shade_data_context->data_type = cpu::SHADE_DATA_TYPE::DIL; void *tensor_data = nullptr; - if (dil_tensor.is_public_format()) { + if (shade_data_context->dil_tensor->is_public_format()) { // The buffer of a tensor with public format is shared between CPU and DNNL - tensor_data = dil_tensor.get_data_handle(); - shade_data_context->cpu_raw_data = dil_tensor.get_data_handle(); + tensor_data = shade_data_context->dil_tensor->get_data_handle(); + shade_data_context->cpu_raw_data = shade_data_context->dil_tensor->get_data_handle(); shade_data_context->cpu_del_fun = &(c10::detail::deleteNothing); } c10::DataPtr shade_data_ptr( @@ -65,15 +65,15 @@ at::Tensor gen_aten_tensor_by(dil::tensor&& dil_tensor) { shade_data_context, cpu::ShadeDataContext::freeShadeDataContext, at::DeviceType::DPCPP); - auto at_data_type = get_at_data_type(dil_tensor.get_data_type()); + auto at_data_type = get_at_data_type(shade_data_context->dil_tensor->get_data_type()); auto storage_impl = c10::make_intrusive( at::scalarTypeToTypeMeta(at_data_type), - dil_tensor.get_nelems(), + shade_data_context->dil_tensor->get_nelems(), std::move(shade_data_ptr), nullptr, /*resizeable=*/false); auto _tensor = at::detail::make_tensor(storage_impl, at::DispatchKey::DPCPPTensorId); - dbl::comm::sync_shape_from_dil_to_aten(_tensor, dil_tensor); + dbl::comm::sync_shape_from_dil_to_aten(_tensor, shade_data_context->dil_tensor.value()); TORCH_INTERNAL_ASSERT_DEBUG_ONLY(_tensor.layout() == c10::kStrided); return _tensor; } From 544ea2d2a6bff0a4acaf67559bd4212927e53987 Mon Sep 17 00:00:00 2001 From: "Wang, Eikan" Date: Thu, 28 May 2020 14:59:00 +0800 Subject: [PATCH 3/3] Fix build issue of PR #20 --- torch_ipex/csrc/cpu/DevOPs.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/torch_ipex/csrc/cpu/DevOPs.cpp b/torch_ipex/csrc/cpu/DevOPs.cpp index 63e2de470..ebe231f42 100644 --- a/torch_ipex/csrc/cpu/DevOPs.cpp +++ b/torch_ipex/csrc/cpu/DevOPs.cpp @@ -594,9 +594,9 @@ at::Tensor AtenIpexCPUDev::dil_linear_fuse_relu( output_size.push_back(weight.size(0)); if (self.dim() > 2) { - return dbl::comm::gen_aten_tensor_by(y).reshape(output_size); + return dbl::comm::gen_aten_tensor_by(std::move(y)).reshape(output_size); } - return dbl::comm::gen_aten_tensor_by(y); + return dbl::comm::gen_aten_tensor_by(std::move(y)); } at::Tensor dil_linear_backward_input( @@ -1036,7 +1036,7 @@ at::Tensor AtenIpexCPUDev::dil_relu_use_dst_for_bwd(const at::Tensor& grad_outpu dil::tensor gradx; dil::eltwise_backward::compute(y, grady, gradx, dil::algorithm::eltwise_relu_use_dst_for_bwd, /*alpha*/ 0.0); - return dbl::comm::gen_aten_tensor_by(gradx); + return dbl::comm::gen_aten_tensor_by(std::move(gradx)); } at::Tensor AtenIpexCPUDev::dil_threshold_backward(const at::Tensor& grad_output, const at::Tensor& input, at::Scalar threshold) {