diff --git a/scripts/cpu/gen-dense-cpu-ops.py b/scripts/cpu/gen-dense-cpu-ops.py index 9042689aa..7a8b7253b 100755 --- a/scripts/cpu/gen-dense-cpu-ops.py +++ b/scripts/cpu/gen-dense-cpu-ops.py @@ -306,7 +306,8 @@ def is_out_func(fname): if param_var == 'out' and is_out_func(fname): code += ' TORCH_INTERNAL_ASSERT({}.is_contiguous());\n'.format(param_var) else: - param_seq_str = '{}.is_contiguous() ? {} : {}.contiguous()'.format(param_var, param_var, param_var) + # param_seq_str = '{}.is_contiguous() ? {} : {}.contiguous()'.format(param_var, param_var, param_var) + None param_seq_str_vec.append(param_seq_str) code += ' if (dbl::chk::dnnl_support_the_tensors(dnnl_input_tensors))\n' code += ' return AtenIpexCPUDev::dil_{}({});\n'.format(fname, ', '.join(param_seq_str_vec)) diff --git a/tests/cpu/test_torch.py b/tests/cpu/test_torch.py index c5fbab570..219a5c48f 100644 --- a/tests/cpu/test_torch.py +++ b/tests/cpu/test_torch.py @@ -12785,12 +12785,8 @@ def _test_memory_format_transformations(self, device, input_generator_fn, transf clone = transformation_fn(xc) if default_is_preserve: - if ipex.get_auto_dnnl(): - self.assertTrue(clone.is_contiguous()) - self.assertFalse(clone.is_contiguous(memory_format=memory_format)) - else: - self.assertFalse(clone.is_contiguous()) - self.assertTrue(clone.is_contiguous(memory_format=memory_format)) + self.assertFalse(clone.is_contiguous()) + self.assertTrue(clone.is_contiguous(memory_format=memory_format)) else: self.assertTrue(clone.is_contiguous()) self.assertFalse(clone.is_contiguous(memory_format=memory_format)) diff --git a/torch_ipex/csrc/cpu/dbl/Common.cpp b/torch_ipex/csrc/cpu/dbl/Common.cpp index 3ce82914a..c27d941d8 100644 --- a/torch_ipex/csrc/cpu/dbl/Common.cpp +++ b/torch_ipex/csrc/cpu/dbl/Common.cpp @@ -20,7 +20,7 @@ dil::tensor dil_tensor_from_dense(const at::Tensor& tensor) { tensor.layout() == at::Layout::Strided, "dil_tensor_view_from_dense expects dense tensor input"); at::ScalarType cur_type = tensor.scalar_type(); - return {tensor.sizes().vec(), get_dil_data_type(cur_type), tensor.data_ptr()}; + return {tensor.sizes().vec(), get_dil_data_type(cur_type), tensor.strides().vec(), tensor.data_ptr()}; } at::Tensor dil_tensor_to_dense(const at::Tensor& tensor) { @@ -36,9 +36,12 @@ at::Tensor dil_tensor_to_dense(const at::Tensor& tensor) { dil::tensor try_gen_dil_tensor(const at::Tensor &input) { if (cpu::ShadeDataContext::isDilTensor(input)) { - return cpu::ShadeDataContext::getDilTensor(input); + auto dil_tensor = cpu::ShadeDataContext::getDilTensor(input); + if (dil_tensor.is_public_format()) { + dil_tensor.set_dims_and_strides(input.sizes().vec(), input.strides().vec()); + } + return dil_tensor; } else { - TORCH_INTERNAL_ASSERT(input.is_contiguous()); return dil_tensor_from_dense(input); } } @@ -60,7 +63,6 @@ at::Tensor gen_aten_tensor_by(dil::tensor dil_tensor) { shade_data_context, cpu::ShadeDataContext::freeShadeDataContext, at::DeviceType::DPCPP); - auto tensor_sizes = dil_tensor.get_dims(); auto at_data_type = get_at_data_type(dil_tensor.get_data_type()); auto storage_impl = c10::make_intrusive( at::scalarTypeToTypeMeta(at_data_type), @@ -69,10 +71,14 @@ at::Tensor gen_aten_tensor_by(dil::tensor dil_tensor) { nullptr, /*resizeable=*/false); auto _tensor = at::detail::make_tensor(storage_impl, at::DispatchKey::DPCPPTensorId); - if (tensor_sizes.size() != 1 || tensor_sizes[0] != 0) { + if (dil_tensor.is_public_format()) { + dbl::comm::sync_shape_from_dil_to_aten(_tensor, dil_tensor); + } else { + // Blockformat does not inlcude stride information + auto tensor_sizes = dil_tensor.get_dims(); + TORCH_INTERNAL_ASSERT(tensor_sizes.size() != 1 || tensor_sizes[0] != 0); _tensor.unsafeGetTensorImpl()->set_sizes_contiguous(tensor_sizes); } - TORCH_INTERNAL_ASSERT(_tensor.is_contiguous()); TORCH_INTERNAL_ASSERT(_tensor.layout() == c10::kStrided); return _tensor; } diff --git a/torch_ipex/csrc/cpu/dbl/DNNLChecker.cpp b/torch_ipex/csrc/cpu/dbl/DNNLChecker.cpp index 7afef50f9..b49f346e8 100644 --- a/torch_ipex/csrc/cpu/dbl/DNNLChecker.cpp +++ b/torch_ipex/csrc/cpu/dbl/DNNLChecker.cpp @@ -56,7 +56,7 @@ bool dnnl_support_the_dimension_of(const std::vector &tensor_vec) { bool dnnl_tensor_has_data(const std::vector &tensor_vec) { for (auto it = tensor_vec.begin(); it != tensor_vec.end(); ++it) - if (it->data_ptr() == nullptr) + if (it->numel() == 0) return false; return true; diff --git a/torch_ipex/csrc/cpu/dil/dil/tensor.hpp b/torch_ipex/csrc/cpu/dil/dil/tensor.hpp index f28107fc6..120e00c65 100644 --- a/torch_ipex/csrc/cpu/dil/dil/tensor.hpp +++ b/torch_ipex/csrc/cpu/dil/dil/tensor.hpp @@ -81,18 +81,6 @@ class tensor : public memory { return static_cast(data.data_type); } - inline dims get_strides() const { - DIL_ENFORCE(is_plain(), "Call to_public() before get_strides()"); - const auto& strides = blocking_strides(); - if (!is_grouped()) { - return dims(strides, strides + data.ndims); - } else { - auto ret = dims(strides + 1, strides + data.ndims); - ret[0] = std::min(strides[0], strides[1]); - return ret; - } - } - /** returns true if memory descriptor is zero */ bool is_zero() const { return data.ndims == 0; } @@ -379,6 +367,17 @@ class tensor : public memory { return const_cast(data).format_desc.blocking.strides; } + inline dims get_strides() const { + const auto& strides = blocking_strides(); + if (!is_grouped()) { + return dims(strides, strides + data.ndims); + } else { + auto ret = dims(strides + 1, strides + data.ndims); + ret[0] = std::min(strides[0], strides[1]); + return ret; + } + } + void set_g(dim groups) { auto reserved_size = sizeof(((dnnl_memory_extra_desc_t *)0)->reserved); auto offset = reserved_size / sizeof(dim) - 1; @@ -449,6 +448,12 @@ class tensor : public memory { init(adims, adata_type, ahandle, aengine); } + // no format_tb, strides, buffer + tensor(const dims &adims, data_type adata_type, const dims &astrides, + void *ahandle, const engine &aengine = engine::cpu_engine()) { + init(adims, adata_type, astrides, ahandle, aengine); + } + // no format_tag, no buffer tensor(const dims &adims, data_type adata_type, const engine &aengine = engine::cpu_engine()) { @@ -480,6 +485,11 @@ class tensor : public memory { init({adims, adata_type, aformat_tag}, ahandle, aengine); } + void init(const dims &adims, data_type adata_type, const dims &astrides, + void *ahandle, const engine &aengine = engine::cpu_engine()) { + init({adims, adata_type, astrides}, ahandle, aengine); + } + // format_tag, no buffer void init(const dims &adims, data_type adata_type, format_tag aformat_tag, const engine &aengine = engine::cpu_engine()) { @@ -571,7 +581,20 @@ class tensor : public memory { /// Returns dimension vector inline dims get_dims() const { return get_desc().get_dims(); } - inline dims get_strides() const { return get_desc().get_strides(); } + inline dims get_strides() const { + DIL_ENFORCE(is_public_format(), "Call to_public() before get_strides()"); + return get_desc().get_strides(); + } + + inline void set_dims_and_strides(const dims &adims, const dims &astrides) { + DIL_ENFORCE(is_public_format(), "Call to_public() before set_dims_and_strides()"); + DIL_ENFORCE(adims.size() == astrides.size(), "Dims and strides must have the same size"); + if (get_dims() == adims && get_strides() == astrides) + return; + auto new_desc = desc(adims, get_data_type(), astrides); + DIL_ENFORCE(get_size() == new_desc.get_size(), "Invalid dims and strides for the original desc"); + set_desc(new_desc); + } /// Return element number of the param. /// The number is the meaning values for a tensor, instead of whole buffer.