From 2c74d116869972a1fa112a4b1314177ca547c5b7 Mon Sep 17 00:00:00 2001 From: "Wang, Eikan" Date: Thu, 7 May 2020 21:46:36 +0800 Subject: [PATCH 1/3] Add data type reorder for DNNL OP --- torch_ipex/csrc/aten_ipex_bridge.cpp | 58 +++++++++++++++++++------- torch_ipex/csrc/aten_ipex_bridge.h | 34 ++++++++++++++- torch_ipex/csrc/cpu/ShadeDataContext.h | 1 + torch_ipex/csrc/ipex_tensor_impl.cpp | 4 ++ torch_ipex/csrc/ipex_tensor_impl.h | 1 + torch_ipex/csrc/utils.cpp | 14 ++++++- torch_ipex/csrc/utils.h | 3 +- 7 files changed, 97 insertions(+), 18 deletions(-) diff --git a/torch_ipex/csrc/aten_ipex_bridge.cpp b/torch_ipex/csrc/aten_ipex_bridge.cpp index 923e11ea1..b3b4d0bc0 100644 --- a/torch_ipex/csrc/aten_ipex_bridge.cpp +++ b/torch_ipex/csrc/aten_ipex_bridge.cpp @@ -50,7 +50,7 @@ namespace bridge { at::Tensor shallowFallbackToCPUTensorImpl(const at::Tensor& ipexTensor); -void reorderDilTensor(const at::Tensor& ipexTensor) { +void reorderDilTensorToPublic(const at::Tensor& ipexTensor) { void *data_ctx = ipexTensor.unsafeGetTensorImpl()->storage().data_ptr().get_context(); cpu::ShadeDataContext *shade_data_context = (cpu::ShadeDataContext*)data_ctx; // All aten::tensor with dnnl::tensor should be contiguous @@ -89,12 +89,11 @@ void reorderDilTensor(const at::Tensor& ipexTensor) { void attachShadeDataConext(const at::Tensor& tensor) { auto tensor_storage_impl = tensor.storage().unsafeGetStorageImpl(); auto& data_ptr = tensor_storage_impl->data_ptr(); - // [NOTE]: We assume the real data of storage should be as same as its context. - // Then we use the assumption to check if current tensor has contained - // shade data context. - if (data_ptr.get() != data_ptr.get_context()) { + + // Has contained shade context + if (check_tensor_own_shade_context(tensor)) return; - } + auto cur_del_fn = data_ptr.get_deleter(); bool res = data_ptr.compare_exchange_deleter(cur_del_fn, &(c10::detail::deleteNothing)); TORCH_INTERNAL_ASSERT(res); @@ -189,7 +188,7 @@ at::Tensor shallowFallbackToCPUTensor(const at::Tensor& ipexTensor) { cpu::ShadeDataContext *shade_data_context = (cpu::ShadeDataContext*)data_ctx; // Branch 2.1: Dense + Dil Tensor if (cpu::ShadeDataContext::isDilTensor(ipexTensor)) { - reorderDilTensor(ipexTensor); + reorderDilTensorToPublic(ipexTensor); } // Branch 2.2: Dense + CPU Tensor @@ -496,24 +495,52 @@ std::vector shallowFallbackToCPUTensorList(const at::TensorList& ten return dpcpp_tensor_vec; } -void cvtTensorToScalaraType(const at::Tensor& ipexTensor, at::ScalarType dstScalarType) { + +void reorderTensorToScalarTypeForDNNL(const at::Tensor& ipexTensor, at::ScalarType dstScalarType) { + if (ipexTensor.device().type() == at::DeviceType::CPU) { + return reorderTensorToScalaraType(ipexTensor, dstScalarType); + } + + TORCH_CHECK(dstScalarType == at::kBFloat16 || dstScalarType == at::kFloat); + auto tensor_dtype = ipexTensor.scalar_type(); + TORCH_CHECK(tensor_dtype == at::kBFloat16 || tensor_dtype == at::kFloat); + if (tensor_dtype == dstScalarType) + return; + + if (check_tensor_own_shade_context(ipexTensor)) { + // Shade data context has been attached + if (cpu::ShadeDataContext::isDilTensor(ipexTensor)) { + cpu::ShadeDataContext *shade_context = (cpu::ShadeDataContext*)(ipexTensor.storage().data_ptr().get_context()); + shade_context->dil_tensor.to_type(get_dil_data_type(dstScalarType)); + IPEXTensorImpl* ipex_tensor_impl = (IPEXTensorImpl *)ipexTensor.unsafeGetTensorImpl(); + ipex_tensor_impl->reset_data_type(dstScalarType); + ipex_tensor_impl->storage().unsafeGetStorageImpl()->set_dtype(at::scalarTypeToTypeMeta(dstScalarType)); + return; + } + } + + return reorderTensorToScalaraType(ipexTensor, dstScalarType); +} + + +void reorderTensorToScalaraType(const at::Tensor& ipexTensor, at::ScalarType dstScalarType) { if (!(ipexTensor.defined())) return; TORCH_CHECK(dstScalarType == at::kBFloat16 || dstScalarType == at::kFloat); - if (ipexTensor.scalar_type() == dstScalarType) + + auto tensor_dtype = ipexTensor.scalar_type(); + TORCH_CHECK(tensor_dtype == at::kBFloat16 || tensor_dtype == at::kFloat); + if (tensor_dtype == dstScalarType) return; - if (check_data_is_part_of_storage(ipexTensor)) + if (!check_tensor_own_whole_storage(ipexTensor)) return; - void *data_ptr = ipexTensor.unsafeGetTensorImpl()->storage().data_ptr().get(); - void *data_ctx = ipexTensor.unsafeGetTensorImpl()->storage().data_ptr().get_context(); - if ((data_ptr != data_ctx) && (data_ctx != nullptr)) { + if (check_tensor_own_shade_context(ipexTensor)) { // Shade data context has been attached - cpu::ShadeDataContext *shade_data_context = (cpu::ShadeDataContext*)data_ctx; if (cpu::ShadeDataContext::isDilTensor(ipexTensor)) { - reorderDilTensor(ipexTensor); + reorderDilTensorToPublic(ipexTensor); } } @@ -528,6 +555,7 @@ void cvtTensorToScalaraType(const at::Tensor& ipexTensor, at::ScalarType dstScal allocator, /*resizeable=*/true); + void *data_ptr = ipexTensor.unsafeGetTensorImpl()->storage().data_ptr().get(); if (dstScalarType == at::kBFloat16) { torch_ipex::cpu::bf16::converter::fp32_to_bf16(storage_impl->data_ptr().get(), data_ptr, nelements); } else { diff --git a/torch_ipex/csrc/aten_ipex_bridge.h b/torch_ipex/csrc/aten_ipex_bridge.h index cbcaf33f0..02ce9822d 100644 --- a/torch_ipex/csrc/aten_ipex_bridge.h +++ b/torch_ipex/csrc/aten_ipex_bridge.h @@ -16,7 +16,39 @@ std::vector fallbackToCPUTensorList(const at::TensorList&); std::vector shallowFallbackToCPUTensorList(const at::TensorList&); void attachShadeDataConext(const at::Tensor& tensor); -void cvtTensorToScalaraType(const at::Tensor& ipexTensor, at::ScalarType dstScalarType); + +/** + * Reorder the DNNL tensor to the public format if the input tensor contains DNNL tensor. + * + * @param[in] ipexTensor The DNNL tensor of the input ipex tensor to be reordered to public format + */ +void reorderDilTensorToPublic(const at::Tensor& ipexTensor); + +/** + * Reorder the input tensor to the specified scalar type. It is an optimized version for + * DNNL OP. It means that if DNNL supports current OP, you should call this API. Otherwise, you + * should call @sa @ref reorderTensorToScalaraType + * + * @param[in] ipexTensor The input ipex tensor to be reordered to the spcified scalar type + * @param[in] dstScalarType The scalar type which the input ipex tensor will be reordered to. It should + * be at::kBFloat16 or at::kFloat + * + * @note + * If the input aten tensor is a DNNL tensor and DNNL supports current OP. Then we only + * need to set the data type of DNNL tensor descriptor to the spcified scalar type. It can + * avoid memory copy to improve performance. And we also need to reset the type meta of + * IPEXTensorImpl and StorageImpl to the corresponding type meta of the specified scalar type. + */ +void reorderTensorToScalarTypeForDNNL(const at::Tensor& ipexTensor, at::ScalarType dstScalarType); + +/** + * Reorder the input tensor to the specified scalar type. + * + * @param[in] ipexTensor The input ipex tensor to be reordered to the spcified scalar type + * @param[in] dstScalarType The scalar type which the input ipex tensor will be reordered to. It should + * be at::kBFloat16 or at::kFloat + */ +void reorderTensorToScalaraType(const at::Tensor& ipexTensor, at::ScalarType dstScalarType); // Convert CPU tensor to DPCPP tensor at::Tensor upgradeToDPCPPTensor(const at::Tensor& ipexTensor); diff --git a/torch_ipex/csrc/cpu/ShadeDataContext.h b/torch_ipex/csrc/cpu/ShadeDataContext.h index 4b669dcf7..634b5df49 100644 --- a/torch_ipex/csrc/cpu/ShadeDataContext.h +++ b/torch_ipex/csrc/cpu/ShadeDataContext.h @@ -87,6 +87,7 @@ struct ShadeDataContext { void *storage_context = tensor.storage().data_ptr().get_context(); ShadeDataContext *shade_data_context = (ShadeDataContext*)storage_context; auto data_type = shade_data_context->data_type; + TORCH_INTERNAL_ASSERT((data_type == SHADE_DATA_TYPE::CPU_RAW) || (data_type == SHADE_DATA_TYPE::DIL)); if (data_type == SHADE_DATA_TYPE::DIL) { TORCH_WARN(tensor.is_contiguous()); diff --git a/torch_ipex/csrc/ipex_tensor_impl.cpp b/torch_ipex/csrc/ipex_tensor_impl.cpp index f72d462f0..851693956 100644 --- a/torch_ipex/csrc/ipex_tensor_impl.cpp +++ b/torch_ipex/csrc/ipex_tensor_impl.cpp @@ -64,6 +64,10 @@ void IPEXTensorImpl::set_dpcpp_tensor_id() { this->key_set_.add(at::DispatchKey::VariableTensorId); } +void IPEXTensorImpl::reset_data_type(at::ScalarType dst_type) { + this->data_type_ = at::scalarTypeToTypeMeta(dst_type); +} + void IPEXTensorImpl::copy_auto_grad(c10::TensorImpl *src_impl) { if (! src_impl->requires_grad()) { TORCH_INTERNAL_ASSERT(! this->requires_grad()); diff --git a/torch_ipex/csrc/ipex_tensor_impl.h b/torch_ipex/csrc/ipex_tensor_impl.h index a3cd2b2fa..532493882 100644 --- a/torch_ipex/csrc/ipex_tensor_impl.h +++ b/torch_ipex/csrc/ipex_tensor_impl.h @@ -24,6 +24,7 @@ class IPEXTensorImpl : public c10::TensorImpl { void set_storage_data_ptr(c10::DataPtr); void set_dpcpp_tensor_id(); void force_set_strided(at::IntArrayRef size, at::IntArrayRef stride /*, optional storage_offset_*/); + void reset_data_type(at::ScalarType dst_type); c10::Storage& get_storage() { return this->storage_; diff --git a/torch_ipex/csrc/utils.cpp b/torch_ipex/csrc/utils.cpp index 6535bfe26..0477f3b8e 100644 --- a/torch_ipex/csrc/utils.cpp +++ b/torch_ipex/csrc/utils.cpp @@ -104,7 +104,7 @@ bool check_auto_dnnl() { return AutoOptConfig::singleton().get_auto_dnnl(); } -bool check_data_is_part_of_storage(const at::Tensor& tensor) { +bool check_tensor_own_whole_storage(const at::Tensor& tensor) { if (!(tensor.defined())) return false; @@ -112,4 +112,16 @@ bool check_data_is_part_of_storage(const at::Tensor& tensor) { (tensor.numel() == tensor.storage().numel()); } +bool check_tensor_own_shade_context(const at::Tensor& tensor) { + if (!(tensor.defined())) + return false; + + // [NOTE]: We assume the real data of storage should be as same as its context. + // Then we use the assumption to check if current tensor has contained + // shade data context. + void *data_ptr = tensor.unsafeGetTensorImpl()->storage().data_ptr().get(); + void *data_ctx = tensor.unsafeGetTensorImpl()->storage().data_ptr().get_context(); + return (data_ptr != data_ctx) && (data_ctx != nullptr); +} + } // namespace torch_ipex diff --git a/torch_ipex/csrc/utils.h b/torch_ipex/csrc/utils.h index 809ad7f5e..0e3ccfebb 100644 --- a/torch_ipex/csrc/utils.h +++ b/torch_ipex/csrc/utils.h @@ -18,6 +18,7 @@ bool get_device_count(c10::Device dev_type, c10::DeviceIndex *count); dil::data_type get_dil_data_type(at::ScalarType); at::ScalarType get_at_data_type(dil::data_type); bool check_auto_dnnl(); -bool check_data_is_part_of_storage(const at::Tensor& tensor); +bool check_tensor_own_whole_storage(const at::Tensor& tensor); +bool check_tensor_own_shade_context(const at::Tensor& tensor); } // namespace torch_ipex From 1206fe2082aaf6e89e7cd0e64ee762312953fedb Mon Sep 17 00:00:00 2001 From: "Wang, Eikan" Date: Mon, 11 May 2020 10:19:38 +0800 Subject: [PATCH 2/3] Remove redundant condition for reordering a tensor to a specified scalar type --- torch_ipex/csrc/aten_ipex_bridge.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/torch_ipex/csrc/aten_ipex_bridge.cpp b/torch_ipex/csrc/aten_ipex_bridge.cpp index b3b4d0bc0..2caca3c40 100644 --- a/torch_ipex/csrc/aten_ipex_bridge.cpp +++ b/torch_ipex/csrc/aten_ipex_bridge.cpp @@ -497,10 +497,6 @@ std::vector shallowFallbackToCPUTensorList(const at::TensorList& ten void reorderTensorToScalarTypeForDNNL(const at::Tensor& ipexTensor, at::ScalarType dstScalarType) { - if (ipexTensor.device().type() == at::DeviceType::CPU) { - return reorderTensorToScalaraType(ipexTensor, dstScalarType); - } - TORCH_CHECK(dstScalarType == at::kBFloat16 || dstScalarType == at::kFloat); auto tensor_dtype = ipexTensor.scalar_type(); TORCH_CHECK(tensor_dtype == at::kBFloat16 || tensor_dtype == at::kFloat); From 5be717ad8e94f8205be42532921c31b7d80ca757 Mon Sep 17 00:00:00 2001 From: "Wang, Eikan" Date: Mon, 11 May 2020 21:18:40 +0800 Subject: [PATCH 3/3] Add assert check because reorder has not supported alias tensor yet. --- torch_ipex/csrc/aten_ipex_bridge.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/torch_ipex/csrc/aten_ipex_bridge.cpp b/torch_ipex/csrc/aten_ipex_bridge.cpp index 2caca3c40..d3039ccbd 100644 --- a/torch_ipex/csrc/aten_ipex_bridge.cpp +++ b/torch_ipex/csrc/aten_ipex_bridge.cpp @@ -530,8 +530,11 @@ void reorderTensorToScalaraType(const at::Tensor& ipexTensor, at::ScalarType dst if (tensor_dtype == dstScalarType) return; - if (!check_tensor_own_whole_storage(ipexTensor)) + if (!check_tensor_own_whole_storage(ipexTensor)) { return; + } else { + TORCH_INTERNAL_ASSERT(false); + } if (check_tensor_own_shade_context(ipexTensor)) { // Shade data context has been attached