Merge pull request #14 from pinzhenx/sync_strides

EikanWang · web-flow · commit f7fbd8afaf19 · 2020-05-18T14:35:17.000+08:00
Sync shape info between dil tensor and aten tensor
diff --git a/scripts/cpu/gen-dense-cpu-ops.py b/scripts/cpu/gen-dense-cpu-ops.py
@@ -306,7 +306,8 @@ def is_out_func(fname):
                     if param_var == 'out' and is_out_func(fname):
                         code += '      TORCH_INTERNAL_ASSERT({}.is_contiguous());\n'.format(param_var)
                     else:
-                        param_seq_str = '{}.is_contiguous() ? {} : {}.contiguous()'.format(param_var, param_var, param_var)
+                        # param_seq_str = '{}.is_contiguous() ? {} : {}.contiguous()'.format(param_var, param_var, param_var)
+                        None
                 param_seq_str_vec.append(param_seq_str)
             code += '      if (dbl::chk::dnnl_support_the_tensors(dnnl_input_tensors))\n'
             code += '        return AtenIpexCPUDev::dil_{}({});\n'.format(fname, ', '.join(param_seq_str_vec))
diff --git a/tests/cpu/test_torch.py b/tests/cpu/test_torch.py
@@ -12785,12 +12785,8 @@ def _test_memory_format_transformations(self, device, input_generator_fn, transf
         clone = transformation_fn(xc)
 
         if default_is_preserve:
-            if ipex.get_auto_dnnl():
-                self.assertTrue(clone.is_contiguous())
-                self.assertFalse(clone.is_contiguous(memory_format=memory_format))
-            else:
-                self.assertFalse(clone.is_contiguous())
-                self.assertTrue(clone.is_contiguous(memory_format=memory_format))
+            self.assertFalse(clone.is_contiguous())
+            self.assertTrue(clone.is_contiguous(memory_format=memory_format))
         else:
             self.assertTrue(clone.is_contiguous())
             self.assertFalse(clone.is_contiguous(memory_format=memory_format))
diff --git a/torch_ipex/csrc/cpu/dbl/Common.cpp b/torch_ipex/csrc/cpu/dbl/Common.cpp
@@ -20,7 +20,7 @@ dil::tensor dil_tensor_from_dense(const at::Tensor& tensor) {
     tensor.layout() == at::Layout::Strided,
     "dil_tensor_view_from_dense expects dense tensor input");
   at::ScalarType cur_type = tensor.scalar_type();
-  return {tensor.sizes().vec(), get_dil_data_type(cur_type), tensor.data_ptr()};
+  return {tensor.sizes().vec(), get_dil_data_type(cur_type), tensor.strides().vec(), tensor.data_ptr()};
 }
 
 at::Tensor dil_tensor_to_dense(const at::Tensor& tensor) {
@@ -36,9 +36,12 @@ at::Tensor dil_tensor_to_dense(const at::Tensor& tensor) {
 
 dil::tensor try_gen_dil_tensor(const at::Tensor &input) {
   if (cpu::ShadeDataContext::isDilTensor(input)) {
-    return cpu::ShadeDataContext::getDilTensor(input);
+    auto dil_tensor = cpu::ShadeDataContext::getDilTensor(input);
+    if (dil_tensor.is_public_format()) {
+      dil_tensor.set_dims_and_strides(input.sizes().vec(), input.strides().vec());
+    }
+    return dil_tensor;
   } else {
-    TORCH_INTERNAL_ASSERT(input.is_contiguous());
     return dil_tensor_from_dense(input);
   }
 }
@@ -60,7 +63,6 @@ at::Tensor gen_aten_tensor_by(dil::tensor dil_tensor) {
     shade_data_context,
     cpu::ShadeDataContext::freeShadeDataContext,
     at::DeviceType::DPCPP);
-  auto tensor_sizes = dil_tensor.get_dims();
   auto at_data_type = get_at_data_type(dil_tensor.get_data_type());
   auto storage_impl = c10::make_intrusive<at::StorageImpl>(
     at::scalarTypeToTypeMeta(at_data_type),
@@ -69,10 +71,14 @@ at::Tensor gen_aten_tensor_by(dil::tensor dil_tensor) {
     nullptr,
     /*resizeable=*/false);
   auto _tensor = at::detail::make_tensor<torch_ipex::IPEXTensorImpl>(storage_impl, at::DispatchKey::DPCPPTensorId);
-  if (tensor_sizes.size() != 1 || tensor_sizes[0] != 0) {
+  if (dil_tensor.is_public_format()) {
+    dbl::comm::sync_shape_from_dil_to_aten(_tensor, dil_tensor);
+  } else {
+    // Blockformat does not inlcude stride information
+    auto tensor_sizes = dil_tensor.get_dims();
+    TORCH_INTERNAL_ASSERT(tensor_sizes.size() != 1 || tensor_sizes[0] != 0);
     _tensor.unsafeGetTensorImpl()->set_sizes_contiguous(tensor_sizes);
   }
-  TORCH_INTERNAL_ASSERT(_tensor.is_contiguous());
   TORCH_INTERNAL_ASSERT(_tensor.layout() == c10::kStrided);
   return _tensor;
 }
diff --git a/torch_ipex/csrc/cpu/dbl/DNNLChecker.cpp b/torch_ipex/csrc/cpu/dbl/DNNLChecker.cpp
@@ -56,7 +56,7 @@ bool dnnl_support_the_dimension_of(const std::vector<at::Tensor> &tensor_vec) {
 
 bool dnnl_tensor_has_data(const std::vector<at::Tensor> &tensor_vec) {
   for (auto it = tensor_vec.begin(); it != tensor_vec.end(); ++it)
-    if (it->data_ptr() == nullptr)
+    if (it->numel() == 0)
       return false;
 
   return true;
diff --git a/torch_ipex/csrc/cpu/dil/dil/tensor.hpp b/torch_ipex/csrc/cpu/dil/dil/tensor.hpp
@@ -81,18 +81,6 @@ class tensor : public memory {
       return static_cast<data_type>(data.data_type);
     }
 
-    inline dims get_strides() const {
-      DIL_ENFORCE(is_plain(), "Call to_public() before get_strides()");
-      const auto& strides = blocking_strides();
-      if (!is_grouped()) {
-        return dims(strides, strides + data.ndims);
-      } else {
-        auto ret = dims(strides + 1, strides + data.ndims);
-        ret[0] = std::min(strides[0], strides[1]);
-        return ret;
-      }
-    }
-
     /** returns true if memory descriptor is zero */
     bool is_zero() const { return data.ndims == 0; }
 
@@ -379,6 +367,17 @@ class tensor : public memory {
       return const_cast<dnnl_memory_desc_t&>(data).format_desc.blocking.strides;
     }
 
+    inline dims get_strides() const {
+      const auto& strides = blocking_strides();
+      if (!is_grouped()) {
+        return dims(strides, strides + data.ndims);
+      } else {
+        auto ret = dims(strides + 1, strides + data.ndims);
+        ret[0] = std::min(strides[0], strides[1]);
+        return ret;
+      }
+    }
+
     void set_g(dim groups) {
       auto reserved_size = sizeof(((dnnl_memory_extra_desc_t *)0)->reserved);
       auto offset = reserved_size / sizeof(dim) - 1;
@@ -449,6 +448,12 @@ class tensor : public memory {
     init(adims, adata_type, ahandle, aengine);
   }
 
+  // no format_tb, strides, buffer
+  tensor(const dims &adims, data_type adata_type, const dims &astrides,
+         void *ahandle, const engine &aengine = engine::cpu_engine()) {
+    init(adims, adata_type, astrides, ahandle, aengine);
+  }
+
   // no format_tag, no buffer
   tensor(const dims &adims, data_type adata_type,
          const engine &aengine = engine::cpu_engine()) {
@@ -480,6 +485,11 @@ class tensor : public memory {
     init({adims, adata_type, aformat_tag}, ahandle, aengine);
   }
 
+  void init(const dims &adims, data_type adata_type, const dims &astrides,
+              void *ahandle, const engine &aengine = engine::cpu_engine()) {
+    init({adims, adata_type, astrides}, ahandle, aengine);
+  }
+
   // format_tag, no buffer
   void init(const dims &adims, data_type adata_type, format_tag aformat_tag,
               const engine &aengine = engine::cpu_engine()) {
@@ -571,7 +581,20 @@ class tensor : public memory {
   /// Returns dimension vector
   inline dims get_dims() const { return get_desc().get_dims(); }
 
-  inline dims get_strides() const { return get_desc().get_strides(); }
+  inline dims get_strides() const {
+    DIL_ENFORCE(is_public_format(), "Call to_public() before get_strides()");
+    return get_desc().get_strides();
+  }
+
+  inline void set_dims_and_strides(const dims &adims, const dims &astrides) {
+    DIL_ENFORCE(is_public_format(), "Call to_public() before set_dims_and_strides()");
+    DIL_ENFORCE(adims.size() == astrides.size(), "Dims and strides must have the same size");
+    if (get_dims() == adims && get_strides() == astrides)
+      return;
+    auto new_desc = desc(adims, get_data_type(), astrides);
+    DIL_ENFORCE(get_size() == new_desc.get_size(), "Invalid dims and strides for the original desc");
+    set_desc(new_desc);
+  }
 
   /// Return element number of the param.
   /// The number is the meaning values for a tensor, instead of whole buffer.