remove unnecessary training reorder (#553)

leslie-fang-intel · web-flow · commit c023ec01fade · 2022-02-22T10:47:45.000+08:00
* remove unnessary training reorder

* clang-format
diff --git a/intel_extension_for_pytorch/csrc/aten/cpu/Conv.cpp b/intel_extension_for_pytorch/csrc/aten/cpu/Conv.cpp
@@ -233,9 +233,18 @@ at::Tensor convolution_backward_input(
       "Only support 2d or 3d convolution for convolution_backward_input");
 
   const ideep::tensor mkldnn_grad_output = itensor_view_from_dense(grad_output);
-  bool is_channels_last =
-      grad_output.suggest_memory_format() == at::MemoryFormat::ChannelsLast ||
-      grad_output.suggest_memory_format() == at::MemoryFormat::ChannelsLast3d;
+  bool is_channels_last_contiguous =
+      grad_output.is_contiguous(at::MemoryFormat::ChannelsLast) ||
+      grad_output.is_contiguous(at::MemoryFormat::ChannelsLast3d);
+
+  auto memory_format = at::MemoryFormat::Contiguous;
+  if (is_channels_last_contiguous) {
+    if (input_size.size() == 4) {
+      memory_format = at::MemoryFormat::ChannelsLast;
+    } else {
+      memory_format = at::MemoryFormat::ChannelsLast3d;
+    }
+  }
 
   std::vector<int64_t> origin_weight_dims;
   origin_weight_dims.push_back(grad_output.size(1));
@@ -256,11 +265,10 @@ at::Tensor convolution_backward_input(
       {},
       ideep::attr_t());
 
-  auto grad_input = at::empty(
-      input_size,
-      grad_output.options().memory_format(grad_output.suggest_memory_format()));
+  auto grad_input =
+      at::empty(input_size, grad_output.options().memory_format(memory_format));
   ideep::tensor mkldnn_grad_input;
-  if (is_channels_last) {
+  if (is_channels_last_contiguous) {
     mkldnn_grad_input = itensor_view_from_dense(grad_input);
   }
 
@@ -275,7 +283,7 @@ at::Tensor convolution_backward_input(
       padding.vec(),
       groups);
 
-  if (is_channels_last) {
+  if (is_channels_last_contiguous) {
     return grad_input;
   } else {
     return mkldnn_to_dense(new_with_itensor_mkldnn(
@@ -302,9 +310,10 @@ std::tuple<at::Tensor, at::Tensor> convolution_backward_weights(
       "Only support 2d or 3d convolution for convolution_backward_weights");
   const ideep::tensor mkldnn_grad_output = itensor_view_from_dense(grad_output);
   const ideep::tensor mkldnn_input = itensor_view_from_dense(input);
-  bool is_channels_last =
-      grad_output.suggest_memory_format() == at::MemoryFormat::ChannelsLast ||
-      grad_output.suggest_memory_format() == at::MemoryFormat::ChannelsLast3d;
+
+  bool is_channels_last_contiguous =
+      grad_output.is_contiguous(at::MemoryFormat::ChannelsLast) ||
+      grad_output.is_contiguous(at::MemoryFormat::ChannelsLast3d);
 
   auto grad_weight = at::empty(weight_size, grad_output.options());
   at::Tensor grad_bias;
@@ -361,7 +370,7 @@ std::tuple<at::Tensor, at::Tensor> convolution_backward_weights(
   if (weight_packed) {
     return std::make_tuple(grad_weight, grad_bias);
   } else {
-    if (is_channels_last) {
+    if (is_channels_last_contiguous) {
       auto memory_format = input.dim() == 4 ? at::MemoryFormat::ChannelsLast
                                             : at::MemoryFormat::ChannelsLast3d;
       return std::make_tuple(
diff --git a/intel_extension_for_pytorch/csrc/cpu/ideep/ideep/operators/conv.hpp b/intel_extension_for_pytorch/csrc/cpu/ideep/ideep/operators/conv.hpp
@@ -697,8 +697,7 @@ struct convolution_forward
     // it will be removed after block format reorder performance improved.
     if (!weights.get_desc().is_plain() &&
         weights.get_desc() != pd.weights_desc()) {
-      auto temp = weights.to_public(nullptr, weights.get_data_type());
-      expected_weights = temp.reorder_if_differ_in(pd.weights_desc());
+      expected_weights = weights.reorder_if_differ_in(pd.weights_desc());
     } else {
       expected_weights = weights.make_grouped_weights(param.groups)
                              .reorder_if_differ_in(pd.weights_desc());
@@ -763,8 +762,7 @@ struct convolution_forward
     // it will be removed after block format reorder performance improved.
     if (!weights.get_desc().is_plain() &&
         weights.get_desc() != pd.weights_desc()) {
-      auto temp = weights.to_public(nullptr, weights.get_data_type());
-      expected_weights = temp.reorder_if_differ_in(pd.weights_desc());
+      expected_weights = weights.reorder_if_differ_in(pd.weights_desc());
     } else {
       expected_weights = weights.make_grouped_weights(param.groups)
                              .reorder_if_differ_in(pd.weights_desc());
@@ -1074,12 +1072,7 @@ struct convolution_backward_weights
     // diff_weights has been init in FW side, but has diff desc with
     // expected_diff_weights.
     if (diff_weights.get_desc() != expected_diff_weights_desc) {
-      // TODO: there has an issue when reorder block to block,
-      //  will be removed after
-      //  https://jira.devtools.intel.com/browse/MFDNN-5557 is fixed.
-      auto temp = expected_diff_weights.to_public(
-          nullptr, expected_diff_weights.get_data_type());
-      diff_weights.feed_from(temp);
+      diff_weights.feed_from(expected_diff_weights);
     }
   }
 };