From 2c74d116869972a1fa112a4b1314177ca547c5b7 Mon Sep 17 00:00:00 2001
From: "Wang, Eikan" <eikan.wang@intel.com>
Date: Thu, 7 May 2020 21:46:36 +0800
Subject: [PATCH 1/3] Add data type reorder for DNNL OP

---
 torch_ipex/csrc/aten_ipex_bridge.cpp   | 58 +++++++++++++++++++-------
 torch_ipex/csrc/aten_ipex_bridge.h     | 34 ++++++++++++++-
 torch_ipex/csrc/cpu/ShadeDataContext.h |  1 +
 torch_ipex/csrc/ipex_tensor_impl.cpp   |  4 ++
 torch_ipex/csrc/ipex_tensor_impl.h     |  1 +
 torch_ipex/csrc/utils.cpp              | 14 ++++++-
 torch_ipex/csrc/utils.h                |  3 +-
 7 files changed, 97 insertions(+), 18 deletions(-)

diff --git a/torch_ipex/csrc/aten_ipex_bridge.cpp b/torch_ipex/csrc/aten_ipex_bridge.cpp
index 923e11ea1..b3b4d0bc0 100644
--- a/torch_ipex/csrc/aten_ipex_bridge.cpp
+++ b/torch_ipex/csrc/aten_ipex_bridge.cpp
@@ -50,7 +50,7 @@ namespace bridge {
 
 at::Tensor shallowFallbackToCPUTensorImpl(const at::Tensor& ipexTensor);
 
-void reorderDilTensor(const at::Tensor& ipexTensor) {
+void reorderDilTensorToPublic(const at::Tensor& ipexTensor) {
   void *data_ctx = ipexTensor.unsafeGetTensorImpl()->storage().data_ptr().get_context();
   cpu::ShadeDataContext *shade_data_context = (cpu::ShadeDataContext*)data_ctx;
   // All aten::tensor with dnnl::tensor should be contiguous
@@ -89,12 +89,11 @@ void reorderDilTensor(const at::Tensor& ipexTensor) {
 void attachShadeDataConext(const at::Tensor& tensor) {
   auto tensor_storage_impl = tensor.storage().unsafeGetStorageImpl();
   auto& data_ptr = tensor_storage_impl->data_ptr();
-  // [NOTE]: We assume the real data of storage should be as same as its context.
-  //         Then we use the assumption to check if current tensor has contained
-  //         shade data context.
-  if (data_ptr.get() != data_ptr.get_context()) {
+
+  // Has contained shade context
+  if (check_tensor_own_shade_context(tensor))
     return;
-  }
+
   auto cur_del_fn = data_ptr.get_deleter();
   bool res = data_ptr.compare_exchange_deleter(cur_del_fn, &(c10::detail::deleteNothing));
   TORCH_INTERNAL_ASSERT(res);
@@ -189,7 +188,7 @@ at::Tensor shallowFallbackToCPUTensor(const at::Tensor& ipexTensor) {
   cpu::ShadeDataContext *shade_data_context = (cpu::ShadeDataContext*)data_ctx;
   // Branch 2.1: Dense + Dil Tensor
   if (cpu::ShadeDataContext::isDilTensor(ipexTensor)) {
-    reorderDilTensor(ipexTensor);
+    reorderDilTensorToPublic(ipexTensor);
   }
 
   // Branch 2.2: Dense + CPU Tensor
@@ -496,24 +495,52 @@ std::vector<at::Tensor> shallowFallbackToCPUTensorList(const at::TensorList& ten
   return dpcpp_tensor_vec;
 }
 
-void cvtTensorToScalaraType(const at::Tensor& ipexTensor, at::ScalarType dstScalarType) {
+
+void reorderTensorToScalarTypeForDNNL(const at::Tensor& ipexTensor, at::ScalarType dstScalarType) {
+  if (ipexTensor.device().type() == at::DeviceType::CPU) {
+    return reorderTensorToScalaraType(ipexTensor, dstScalarType);
+  }
+
+  TORCH_CHECK(dstScalarType == at::kBFloat16 || dstScalarType == at::kFloat);
+  auto tensor_dtype = ipexTensor.scalar_type();
+  TORCH_CHECK(tensor_dtype == at::kBFloat16 || tensor_dtype == at::kFloat);
+  if (tensor_dtype == dstScalarType)
+    return;
+
+  if (check_tensor_own_shade_context(ipexTensor)) {
+    // Shade data context has been attached
+    if (cpu::ShadeDataContext::isDilTensor(ipexTensor)) {
+      cpu::ShadeDataContext *shade_context = (cpu::ShadeDataContext*)(ipexTensor.storage().data_ptr().get_context());
+      shade_context->dil_tensor.to_type(get_dil_data_type(dstScalarType));
+      IPEXTensorImpl* ipex_tensor_impl = (IPEXTensorImpl *)ipexTensor.unsafeGetTensorImpl();
+      ipex_tensor_impl->reset_data_type(dstScalarType);
+      ipex_tensor_impl->storage().unsafeGetStorageImpl()->set_dtype(at::scalarTypeToTypeMeta(dstScalarType));
+      return;
+    }
+  }
+
+  return reorderTensorToScalaraType(ipexTensor, dstScalarType);
+}
+
+
+void reorderTensorToScalaraType(const at::Tensor& ipexTensor, at::ScalarType dstScalarType) {
   if (!(ipexTensor.defined()))
     return;
 
   TORCH_CHECK(dstScalarType == at::kBFloat16 || dstScalarType == at::kFloat);
-  if (ipexTensor.scalar_type() == dstScalarType)
+
+  auto tensor_dtype = ipexTensor.scalar_type();
+  TORCH_CHECK(tensor_dtype == at::kBFloat16 || tensor_dtype == at::kFloat);
+  if (tensor_dtype == dstScalarType)
     return;
 
-  if (check_data_is_part_of_storage(ipexTensor))
+  if (!check_tensor_own_whole_storage(ipexTensor))
     return;
 
-  void *data_ptr = ipexTensor.unsafeGetTensorImpl()->storage().data_ptr().get();
-  void *data_ctx = ipexTensor.unsafeGetTensorImpl()->storage().data_ptr().get_context();
-  if ((data_ptr != data_ctx) && (data_ctx != nullptr)) {
+  if (check_tensor_own_shade_context(ipexTensor)) {
     // Shade data context has been attached
-    cpu::ShadeDataContext *shade_data_context = (cpu::ShadeDataContext*)data_ctx;
     if (cpu::ShadeDataContext::isDilTensor(ipexTensor)) {
-      reorderDilTensor(ipexTensor);
+      reorderDilTensorToPublic(ipexTensor);
     }
   }
 
@@ -528,6 +555,7 @@ void cvtTensorToScalaraType(const at::Tensor& ipexTensor, at::ScalarType dstScal
     allocator,
     /*resizeable=*/true);
 
+  void *data_ptr = ipexTensor.unsafeGetTensorImpl()->storage().data_ptr().get();
   if (dstScalarType == at::kBFloat16) {
     torch_ipex::cpu::bf16::converter::fp32_to_bf16(storage_impl->data_ptr().get(), data_ptr, nelements);
   } else {
diff --git a/torch_ipex/csrc/aten_ipex_bridge.h b/torch_ipex/csrc/aten_ipex_bridge.h
index cbcaf33f0..02ce9822d 100644
--- a/torch_ipex/csrc/aten_ipex_bridge.h
+++ b/torch_ipex/csrc/aten_ipex_bridge.h
@@ -16,7 +16,39 @@ std::vector<at::Tensor> fallbackToCPUTensorList(const at::TensorList&);
 std::vector<at::Tensor> shallowFallbackToCPUTensorList(const at::TensorList&);
 
 void attachShadeDataConext(const at::Tensor& tensor);
-void cvtTensorToScalaraType(const at::Tensor& ipexTensor, at::ScalarType dstScalarType);
+
+/**
+ * Reorder the DNNL tensor to the public format if the input tensor contains DNNL tensor.
+ * 
+ * @param[in] ipexTensor The DNNL tensor of the input ipex tensor to be reordered to public format
+ */
+void reorderDilTensorToPublic(const at::Tensor& ipexTensor);
+
+/**
+ * Reorder the input tensor to the specified scalar type. It is an optimized version for
+ * DNNL OP. It means that if DNNL supports current OP, you should call this API. Otherwise, you
+ * should call @sa @ref reorderTensorToScalaraType
+ * 
+ * @param[in] ipexTensor    The input ipex tensor to be reordered to the spcified scalar type
+ * @param[in] dstScalarType The scalar type which the input ipex tensor will be reordered to. It should
+ *                          be at::kBFloat16 or at::kFloat
+ * 
+ * @note
+ * If the input aten tensor is a DNNL tensor and DNNL supports current OP. Then we only
+ * need to set the data type of DNNL tensor descriptor to the spcified scalar type. It can
+ * avoid memory copy to improve performance. And we also need to reset the type meta of
+ * IPEXTensorImpl and StorageImpl to the corresponding type meta of the specified scalar type.
+ */
+void reorderTensorToScalarTypeForDNNL(const at::Tensor& ipexTensor, at::ScalarType dstScalarType);
+
+/**
+ * Reorder the input tensor to the specified scalar type.
+ * 
+ * @param[in] ipexTensor    The input ipex tensor to be reordered to the spcified scalar type
+ * @param[in] dstScalarType The scalar type which the input ipex tensor will be reordered to. It should
+ *                          be at::kBFloat16 or at::kFloat
+ */
+void reorderTensorToScalaraType(const at::Tensor& ipexTensor, at::ScalarType dstScalarType);
 
 // Convert CPU tensor to DPCPP tensor
 at::Tensor upgradeToDPCPPTensor(const at::Tensor& ipexTensor);
diff --git a/torch_ipex/csrc/cpu/ShadeDataContext.h b/torch_ipex/csrc/cpu/ShadeDataContext.h
index 4b669dcf7..634b5df49 100644
--- a/torch_ipex/csrc/cpu/ShadeDataContext.h
+++ b/torch_ipex/csrc/cpu/ShadeDataContext.h
@@ -87,6 +87,7 @@ struct ShadeDataContext {
     void *storage_context = tensor.storage().data_ptr().get_context();
     ShadeDataContext *shade_data_context = (ShadeDataContext*)storage_context;
     auto data_type = shade_data_context->data_type;
+    TORCH_INTERNAL_ASSERT((data_type == SHADE_DATA_TYPE::CPU_RAW) || (data_type == SHADE_DATA_TYPE::DIL));
 
     if (data_type == SHADE_DATA_TYPE::DIL) {
       TORCH_WARN(tensor.is_contiguous());
diff --git a/torch_ipex/csrc/ipex_tensor_impl.cpp b/torch_ipex/csrc/ipex_tensor_impl.cpp
index f72d462f0..851693956 100644
--- a/torch_ipex/csrc/ipex_tensor_impl.cpp
+++ b/torch_ipex/csrc/ipex_tensor_impl.cpp
@@ -64,6 +64,10 @@ void IPEXTensorImpl::set_dpcpp_tensor_id() {
   this->key_set_.add(at::DispatchKey::VariableTensorId);
 }
 
+void IPEXTensorImpl::reset_data_type(at::ScalarType dst_type) {
+  this->data_type_ = at::scalarTypeToTypeMeta(dst_type);
+}
+
 void IPEXTensorImpl::copy_auto_grad(c10::TensorImpl *src_impl) {
   if (! src_impl->requires_grad()) {
     TORCH_INTERNAL_ASSERT(! this->requires_grad());
diff --git a/torch_ipex/csrc/ipex_tensor_impl.h b/torch_ipex/csrc/ipex_tensor_impl.h
index a3cd2b2fa..532493882 100644
--- a/torch_ipex/csrc/ipex_tensor_impl.h
+++ b/torch_ipex/csrc/ipex_tensor_impl.h
@@ -24,6 +24,7 @@ class IPEXTensorImpl : public c10::TensorImpl {
   void set_storage_data_ptr(c10::DataPtr);
   void set_dpcpp_tensor_id();
   void force_set_strided(at::IntArrayRef size, at::IntArrayRef stride /*, optional<int64_t> storage_offset_*/);
+  void reset_data_type(at::ScalarType dst_type);
 
   c10::Storage& get_storage() {
     return this->storage_;
diff --git a/torch_ipex/csrc/utils.cpp b/torch_ipex/csrc/utils.cpp
index 6535bfe26..0477f3b8e 100644
--- a/torch_ipex/csrc/utils.cpp
+++ b/torch_ipex/csrc/utils.cpp
@@ -104,7 +104,7 @@ bool check_auto_dnnl() {
   return AutoOptConfig::singleton().get_auto_dnnl();
 }
 
-bool check_data_is_part_of_storage(const at::Tensor& tensor) {
+bool check_tensor_own_whole_storage(const at::Tensor& tensor) {
   if (!(tensor.defined()))
     return false;
 
@@ -112,4 +112,16 @@ bool check_data_is_part_of_storage(const at::Tensor& tensor) {
          (tensor.numel() == tensor.storage().numel());
 }
 
+bool check_tensor_own_shade_context(const at::Tensor& tensor) {
+  if (!(tensor.defined()))
+    return false;
+
+  // [NOTE]: We assume the real data of storage should be as same as its context.
+  //         Then we use the assumption to check if current tensor has contained
+  //         shade data context.
+  void *data_ptr = tensor.unsafeGetTensorImpl()->storage().data_ptr().get();
+  void *data_ctx = tensor.unsafeGetTensorImpl()->storage().data_ptr().get_context();
+  return (data_ptr != data_ctx) && (data_ctx != nullptr);
+}
+
 } // namespace torch_ipex
diff --git a/torch_ipex/csrc/utils.h b/torch_ipex/csrc/utils.h
index 809ad7f5e..0e3ccfebb 100644
--- a/torch_ipex/csrc/utils.h
+++ b/torch_ipex/csrc/utils.h
@@ -18,6 +18,7 @@ bool get_device_count(c10::Device dev_type, c10::DeviceIndex *count);
 dil::data_type get_dil_data_type(at::ScalarType);
 at::ScalarType get_at_data_type(dil::data_type);
 bool check_auto_dnnl();
-bool check_data_is_part_of_storage(const at::Tensor& tensor);
+bool check_tensor_own_whole_storage(const at::Tensor& tensor);
+bool check_tensor_own_shade_context(const at::Tensor& tensor);
 
 } // namespace torch_ipex

From 1206fe2082aaf6e89e7cd0e64ee762312953fedb Mon Sep 17 00:00:00 2001
From: "Wang, Eikan" <eikan.wang@intel.com>
Date: Mon, 11 May 2020 10:19:38 +0800
Subject: [PATCH 2/3] Remove redundant condition for reordering a tensor to a
 specified scalar type

---
 torch_ipex/csrc/aten_ipex_bridge.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/torch_ipex/csrc/aten_ipex_bridge.cpp b/torch_ipex/csrc/aten_ipex_bridge.cpp
index b3b4d0bc0..2caca3c40 100644
--- a/torch_ipex/csrc/aten_ipex_bridge.cpp
+++ b/torch_ipex/csrc/aten_ipex_bridge.cpp
@@ -497,10 +497,6 @@ std::vector<at::Tensor> shallowFallbackToCPUTensorList(const at::TensorList& ten
 
 
 void reorderTensorToScalarTypeForDNNL(const at::Tensor& ipexTensor, at::ScalarType dstScalarType) {
-  if (ipexTensor.device().type() == at::DeviceType::CPU) {
-    return reorderTensorToScalaraType(ipexTensor, dstScalarType);
-  }
-
   TORCH_CHECK(dstScalarType == at::kBFloat16 || dstScalarType == at::kFloat);
   auto tensor_dtype = ipexTensor.scalar_type();
   TORCH_CHECK(tensor_dtype == at::kBFloat16 || tensor_dtype == at::kFloat);

From 5be717ad8e94f8205be42532921c31b7d80ca757 Mon Sep 17 00:00:00 2001
From: "Wang, Eikan" <eikan.wang@intel.com>
Date: Mon, 11 May 2020 21:18:40 +0800
Subject: [PATCH 3/3] Add assert check because reorder has not supported alias
 tensor yet.

---
 torch_ipex/csrc/aten_ipex_bridge.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/torch_ipex/csrc/aten_ipex_bridge.cpp b/torch_ipex/csrc/aten_ipex_bridge.cpp
index 2caca3c40..d3039ccbd 100644
--- a/torch_ipex/csrc/aten_ipex_bridge.cpp
+++ b/torch_ipex/csrc/aten_ipex_bridge.cpp
@@ -530,8 +530,11 @@ void reorderTensorToScalaraType(const at::Tensor& ipexTensor, at::ScalarType dst
   if (tensor_dtype == dstScalarType)
     return;
 
-  if (!check_tensor_own_whole_storage(ipexTensor))
+  if (!check_tensor_own_whole_storage(ipexTensor)) {
     return;
+  } else {
+    TORCH_INTERNAL_ASSERT(false);
+  }
 
   if (check_tensor_own_shade_context(ipexTensor)) {
     // Shade data context has been attached