diff --git a/cmake/CPU.cmake b/cmake/CPU.cmake index 08a246f35..8ae17fb3e 100644 --- a/cmake/CPU.cmake +++ b/cmake/CPU.cmake @@ -27,7 +27,7 @@ IF(CMAKE_BUILD_TYPE MATCHES Debug) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g -D_DEBUG") ELSE() message("Release build.") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -DNDEBUG") ENDIF() # ---[ Build flags diff --git a/scripts/cpu/gen-dense-cpu-ops.py b/scripts/cpu/gen-dense-cpu-ops.py index 9784e8910..4096416fc 100755 --- a/scripts/cpu/gen-dense-cpu-ops.py +++ b/scripts/cpu/gen-dense-cpu-ops.py @@ -304,7 +304,7 @@ def is_out_func(fname): param_seq_str = param_var if param_var in dnnl_tensor_param_vars: if param_var == 'out' and is_out_func(fname): - code += ' TORCH_INTERNAL_ASSERT({}.is_contiguous());\n'.format(param_var) + code += ' TORCH_INTERNAL_ASSERT_DEBUG_ONLY({}.is_contiguous());\n'.format(param_var) else: param_seq_str = '{}.is_contiguous() ? {} : {}.contiguous()'.format(param_var, param_var, param_var) param_seq_str_vec.append(param_seq_str) @@ -334,10 +334,10 @@ def gen_fallback_prepare_code(self, cpp_sig): ipex_name = '_ipex_{}'.format(param.name) param.ipex_name = ipex_name check_cond = '{}.device().type() == at::DeviceType::DPCPP'.format(param.name) - op_check_code += ' TORCH_INTERNAL_ASSERT({});\n'.format(check_cond) + op_check_code += ' TORCH_INTERNAL_ASSERT_DEBUG_ONLY({});\n'.format(check_cond) code += ' at::TensorOptions {} = {}.device(at::DeviceType::CPU);\n'.format(ipex_name, param.name) elif param.core_type == 'Storage': - code += ' TORCH_INTERNAL_ASSERT({}.device_type() == c10::DeviceType::DPCPP);\n'.format(param.name) + code += ' TORCH_INTERNAL_ASSERT_DEBUG_ONLY({}.device_type() == c10::DeviceType::DPCPP);\n'.format(param.name) elif param.core_type == 'MemoryFormat': if param.is_optional: check_cond = '{}.value_or(c10::MemoryFormat::Contiguous) != c10::MemoryFormat::Contiguous'.format(param.name) @@ -352,7 +352,7 @@ def gen_fallback_prepare_code(self, cpp_sig): assert param.core_type == 'Tensor' ipex_name = '_ipex_{}'.format(param.name) check_cond = '{}.layout() == c10::kStrided'.format(param.name) - op_check_code += ' TORCH_INTERNAL_ASSERT({});\n'.format(check_cond) + op_check_code += ' TORCH_INTERNAL_ASSERT_DEBUG_ONLY({});\n'.format(check_cond) code += ' auto&& {} = bridge::{}({});\n'.format(ipex_name, _SHALLOW_FALLBACK_TO_CPU_TENSOR, param.name) param.ipex_name = ipex_name return op_check_code + code diff --git a/scripts/cpu/gen-sparse-cpu-ops.py b/scripts/cpu/gen-sparse-cpu-ops.py index bd4a014f7..3f99c9e18 100755 --- a/scripts/cpu/gen-sparse-cpu-ops.py +++ b/scripts/cpu/gen-sparse-cpu-ops.py @@ -260,10 +260,10 @@ def gen_fallback_prepare_code(self, cpp_sig): ipex_name = '_ipex_{}'.format(param.name) param.ipex_name = ipex_name check_cond = '{}.device().type() == at::DeviceType::DPCPP'.format(param.name) - op_check_code += ' TORCH_INTERNAL_ASSERT({});\n'.format(check_cond) + op_check_code += ' TORCH_INTERNAL_ASSERT_DEBUG_ONLY({});\n'.format(check_cond) code += ' at::TensorOptions {} = {}.device(at::DeviceType::CPU);\n'.format(ipex_name, param.name) elif param.core_type == 'Storage': - code += ' TORCH_INTERNAL_ASSERT({}.device_type() == c10::DeviceType::DPCPP);\n'.format(param.name) + code += ' TORCH_INTERNAL_ASSERT_DEBUG_ONLY({}.device_type() == c10::DeviceType::DPCPP);\n'.format(param.name) elif param.core_type == 'MemoryFormat': None elif param.core_type != 'Tensor': diff --git a/torch_ipex/csrc/aten_ipex_bridge.cpp b/torch_ipex/csrc/aten_ipex_bridge.cpp index 62b187c98..266955201 100644 --- a/torch_ipex/csrc/aten_ipex_bridge.cpp +++ b/torch_ipex/csrc/aten_ipex_bridge.cpp @@ -21,26 +21,26 @@ namespace bridge { #if defined(_DEBUG) #define CHECK_TENSOR(a, b) \ - TORCH_INTERNAL_ASSERT(a.numel() == b.numel()); \ - TORCH_INTERNAL_ASSERT(a.dtype() == b.dtype()); \ - TORCH_INTERNAL_ASSERT(a.unsafeGetTensorImpl()->sizes() == b.unsafeGetTensorImpl()->sizes()); \ - TORCH_INTERNAL_ASSERT(a.unsafeGetTensorImpl()->dtype() == b.unsafeGetTensorImpl()->dtype()); \ - TORCH_INTERNAL_ASSERT(a.unsafeGetTensorImpl()->is_contiguous() == b.unsafeGetTensorImpl()->is_contiguous()); \ - TORCH_INTERNAL_ASSERT(a.unsafeGetTensorImpl()->is_contiguous(at::MemoryFormat::ChannelsLast) == b.unsafeGetTensorImpl()->is_contiguous(at::MemoryFormat::ChannelsLast)); \ - TORCH_INTERNAL_ASSERT(a.unsafeGetTensorImpl()->is_strides_like_channels_last() == b.unsafeGetTensorImpl()->is_strides_like_channels_last()); \ - TORCH_INTERNAL_ASSERT(a.unsafeGetTensorImpl()->is_non_overlapping_and_dense() == b.unsafeGetTensorImpl()->is_non_overlapping_and_dense()); \ - TORCH_INTERNAL_ASSERT(a.unsafeGetTensorImpl()->is_wrapped_number() == b.unsafeGetTensorImpl()->is_wrapped_number()); \ - TORCH_INTERNAL_ASSERT(a.unsafeGetTensorImpl()->version_counter().current_version() == b.unsafeGetTensorImpl()->version_counter().current_version()); \ - TORCH_INTERNAL_ASSERT(a.unsafeGetTensorImpl()->allow_tensor_metadata_change() == b.unsafeGetTensorImpl()->allow_tensor_metadata_change()) + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(a.numel() == b.numel()); \ + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(a.dtype() == b.dtype()); \ + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(a.unsafeGetTensorImpl()->sizes() == b.unsafeGetTensorImpl()->sizes()); \ + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(a.unsafeGetTensorImpl()->dtype() == b.unsafeGetTensorImpl()->dtype()); \ + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(a.unsafeGetTensorImpl()->is_contiguous() == b.unsafeGetTensorImpl()->is_contiguous()); \ + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(a.unsafeGetTensorImpl()->is_contiguous(at::MemoryFormat::ChannelsLast) == b.unsafeGetTensorImpl()->is_contiguous(at::MemoryFormat::ChannelsLast)); \ + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(a.unsafeGetTensorImpl()->is_strides_like_channels_last() == b.unsafeGetTensorImpl()->is_strides_like_channels_last()); \ + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(a.unsafeGetTensorImpl()->is_non_overlapping_and_dense() == b.unsafeGetTensorImpl()->is_non_overlapping_and_dense()); \ + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(a.unsafeGetTensorImpl()->is_wrapped_number() == b.unsafeGetTensorImpl()->is_wrapped_number()); \ + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(a.unsafeGetTensorImpl()->version_counter().current_version() == b.unsafeGetTensorImpl()->version_counter().current_version()); \ + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(a.unsafeGetTensorImpl()->allow_tensor_metadata_change() == b.unsafeGetTensorImpl()->allow_tensor_metadata_change()) #else #define CHECK_TENSOR(a, b) ((void) 0) #endif #if defined(_DEBUG) #define CHECK_TENSOR_CRITICAL(a, b, may_alias) \ - TORCH_INTERNAL_ASSERT(!may_alias || a.data_ptr() == b.data_ptr()); \ - TORCH_INTERNAL_ASSERT(a.unsafeGetTensorImpl()->strides() == b.unsafeGetTensorImpl()->strides()); \ - TORCH_INTERNAL_ASSERT(a.unsafeGetTensorImpl()->storage_offset() == b.unsafeGetTensorImpl()->storage_offset()); \ + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!may_alias || a.data_ptr() == b.data_ptr()); \ + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(a.unsafeGetTensorImpl()->strides() == b.unsafeGetTensorImpl()->strides()); \ + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(a.unsafeGetTensorImpl()->storage_offset() == b.unsafeGetTensorImpl()->storage_offset()); \ CHECK_TENSOR(a, b) #else #define CHECK_TENSOR_CRITICAL(a, b, may_alias) ((void) 0) @@ -48,12 +48,12 @@ namespace bridge { #if defined(_DEBUG) #define CHECK_SPARSE_TENSOR_CRITICAL(a, b, may_alias) \ - TORCH_INTERNAL_ASSERT(!may_alias || a._indices().data_ptr() == b._indices().data_ptr()); \ - TORCH_INTERNAL_ASSERT(!may_alias || a._values().data_ptr() == b._values().data_ptr()); \ - TORCH_INTERNAL_ASSERT(a.sparse_dim() == b.sparse_dim()); \ - TORCH_INTERNAL_ASSERT(a.dense_dim() == b.dense_dim()); \ - TORCH_INTERNAL_ASSERT(a._nnz() == b._nnz()); \ - TORCH_INTERNAL_ASSERT(a.is_coalesced() == b.is_coalesced()); \ + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!may_alias || a._indices().data_ptr() == b._indices().data_ptr()); \ + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!may_alias || a._values().data_ptr() == b._values().data_ptr()); \ + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(a.sparse_dim() == b.sparse_dim()); \ + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(a.dense_dim() == b.dense_dim()); \ + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(a._nnz() == b._nnz()); \ + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(a.is_coalesced() == b.is_coalesced()); \ CHECK_TENSOR(a._indices(), b._indices()); \ CHECK_TENSOR(a._values(), b._values()) #else @@ -66,21 +66,21 @@ void reorderDilTensorToPublic(const at::Tensor& ipexTensor) { void *data_ctx = ipexTensor.unsafeGetTensorImpl()->storage().data_ptr().get_context(); cpu::ShadeDataContext *shade_data_context = (cpu::ShadeDataContext*)data_ctx; #if defined(_DEBUG) - TORCH_INTERNAL_ASSERT(! (shade_data_context->dil_tensor.is_empty())); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(! (shade_data_context->dil_tensor.is_empty())); #endif dil::tensor &dil_tensor = shade_data_context->dil_tensor; if (dil_tensor.is_public_format()) { #if defined(_DEBUG) - TORCH_INTERNAL_ASSERT(shade_data_context->cpu_raw_data == shade_data_context->dil_tensor.get_data_handle()); - TORCH_INTERNAL_ASSERT(shade_data_context->cpu_raw_data != nullptr); - TORCH_INTERNAL_ASSERT(shade_data_context->cpu_del_fun != nullptr); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(shade_data_context->cpu_raw_data == shade_data_context->dil_tensor.get_data_handle()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(shade_data_context->cpu_raw_data != nullptr); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(shade_data_context->cpu_del_fun != nullptr); #endif } else { #if defined(_DEBUG) auto& data_ptr = ipexTensor.storage().unsafeGetStorageImpl()->data_ptr(); - TORCH_INTERNAL_ASSERT(data_ptr.get_deleter() == &(cpu::ShadeDataContext::freeShadeDataContext)); - TORCH_INTERNAL_ASSERT(shade_data_context->cpu_del_fun == nullptr); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(data_ptr.get_deleter() == &(cpu::ShadeDataContext::freeShadeDataContext)); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(shade_data_context->cpu_del_fun == nullptr); #endif auto pub_tensor = dil_tensor.to_public(nullptr, dil_tensor.get_data_type()); @@ -116,7 +116,7 @@ void attachShadeDataConext(const at::Tensor& tensor) { auto cur_del_fn = data_ptr.get_deleter(); bool res = data_ptr.compare_exchange_deleter(cur_del_fn, &(c10::detail::deleteNothing)); - TORCH_INTERNAL_ASSERT(res); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(res); // Make sure that does not triger free resource for set_ptr cpu::ShadeDataContext *shade_data_context = cpu::ShadeDataContext::allocShadeDataContext(); shade_data_context->cpu_raw_data = data_ptr.get(); @@ -131,44 +131,6 @@ void attachShadeDataConext(const at::Tensor& tensor) { } -// Fallback DPCPP tensor to CPU Tensor. -// It will allocate new memory buffer and then duplicate the DPCPP tensor buffer to create new CPU Tensor -at::Tensor fallbackToCPUTensor(const at::Tensor& ipexTensor) { - TORCH_INTERNAL_ASSERT(ipexTensor.defined()); - TORCH_INTERNAL_ASSERT(!ipexTensor.is_sparse()); - TORCH_INTERNAL_ASSERT(ipexTensor.is_contiguous()); - TORCH_INTERNAL_ASSERT(ipexTensor.layout() == c10::kStrided); - TORCH_INTERNAL_ASSERT(ipexTensor.device().type() == at::DeviceType::DPCPP); - if (ipexTensor.device().is_cpu()) - return ipexTensor; - - if (ipexTensor.device().type() != at::DeviceType::DPCPP) { - assert(false); - } - - auto* allocator = c10::GetAllocator(c10::DeviceType::CPU); - int64_t nelements = ipexTensor.numel(); - auto dtype = ipexTensor.dtype(); - int64_t data_size = nelements * dtype.itemsize(); - auto storage_impl = c10::make_intrusive( - dtype, - nelements, - allocator->allocate(data_size), - allocator, - /*resizeable=*/true); - memcpy(storage_impl->data(), ipexTensor.unsafeGetTensorImpl()->data(), data_size); - - auto _tensor = at::detail::make_tensor(storage_impl, at::DispatchKey::CPUTensorId); - IPEXTensorImpl::CopyMetadata(_tensor.unsafeGetTensorImpl(), ipexTensor.unsafeGetTensorImpl()); - auto _tensor_sizes = ipexTensor.sizes(); - if (_tensor_sizes.size() != 1 || _tensor_sizes[0] != 0) { - _tensor.unsafeGetTensorImpl()->set_sizes_contiguous(_tensor_sizes); - } - CHECK_TENSOR(_tensor, ipexTensor); - return _tensor; -} - - // Unpack CPU tensor from ipex tensor and return to caller directly //at::Tensor shallowFallbackToCPUShadeTensor(const at::Tensor& ipexTensor) { at::Tensor shallowFallbackToCPUTensor(const at::Tensor& ipexTensor) { @@ -177,13 +139,13 @@ at::Tensor shallowFallbackToCPUTensor(const at::Tensor& ipexTensor) { } if (ipexTensor.device().is_cpu()) { - TORCH_INTERNAL_ASSERT(! (ipexTensor.key_set().has(at::DispatchKey::DPCPPTensorId))); - TORCH_INTERNAL_ASSERT(! (ipexTensor.key_set().has(at::DispatchKey::SparseDPCPPTensorId))); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(! (ipexTensor.key_set().has(at::DispatchKey::DPCPPTensorId))); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(! (ipexTensor.key_set().has(at::DispatchKey::SparseDPCPPTensorId))); return ipexTensor; } - TORCH_INTERNAL_ASSERT(ipexTensor.device().is_dpcpp()); - TORCH_INTERNAL_ASSERT( + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(ipexTensor.device().is_dpcpp()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( ipexTensor.key_set().has(at::DispatchKey::DPCPPTensorId) || ipexTensor.key_set().has(at::DispatchKey::SparseDPCPPTensorId)); @@ -204,7 +166,7 @@ at::Tensor shallowFallbackToCPUTensor(const at::Tensor& ipexTensor) { return shallowFallbackToCPUTensorImpl(ipexTensor); } - TORCH_INTERNAL_ASSERT(data_ctx != nullptr); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(data_ctx != nullptr); cpu::ShadeDataContext *shade_data_context = (cpu::ShadeDataContext*)data_ctx; // Branch 2.1: Dense + Dil Tensor if (cpu::ShadeDataContext::isDilTensor(ipexTensor)) { @@ -224,16 +186,16 @@ at::Tensor shallowFallbackToCPUTensorImpl(const at::Tensor& ipexTensor) { } if (ipexTensor.device().is_cpu()) { - TORCH_INTERNAL_ASSERT(! (ipexTensor.key_set().has(at::DispatchKey::DPCPPTensorId))); - TORCH_INTERNAL_ASSERT(! (ipexTensor.key_set().has(at::DispatchKey::SparseDPCPPTensorId))); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(! (ipexTensor.key_set().has(at::DispatchKey::DPCPPTensorId))); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(! (ipexTensor.key_set().has(at::DispatchKey::SparseDPCPPTensorId))); return ipexTensor; } if (ipexTensor.is_sparse()) { - TORCH_INTERNAL_ASSERT(ipexTensor.layout() == c10::kSparse); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(ipexTensor.layout() == c10::kSparse); // [NOTE]: Use _indices and _values interfaces to bypass non-coalesced check - TORCH_INTERNAL_ASSERT(ipexTensor._indices().layout() == c10::kStrided); - TORCH_INTERNAL_ASSERT(ipexTensor._values().layout() == c10::kStrided); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(ipexTensor._indices().layout() == c10::kStrided); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(ipexTensor._values().layout() == c10::kStrided); auto&& cpu_indices = shallowFallbackToCPUTensorImpl(ipexTensor._indices()); auto&& cpu_values = shallowFallbackToCPUTensorImpl(ipexTensor._values()); @@ -249,9 +211,9 @@ at::Tensor shallowFallbackToCPUTensorImpl(const at::Tensor& ipexTensor) { return _tensor; } else { auto *ipex_tensor_impl = ipexTensor.unsafeGetTensorImpl(); - TORCH_INTERNAL_ASSERT(ipex_tensor_impl != nullptr); - TORCH_INTERNAL_ASSERT(ipex_tensor_impl->has_storage()); - TORCH_INTERNAL_ASSERT(ipexTensor.layout() == c10::kStrided); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(ipex_tensor_impl != nullptr); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(ipex_tensor_impl->has_storage()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(ipexTensor.layout() == c10::kStrided); auto ipex_tensor_storage = ipex_tensor_impl->storage().unsafeGetStorageImpl(); ipex_tensor_storage->data_ptr().unsafe_set_device(c10::Device(at::DeviceType::CPU)); auto _tensor = at::detail::make_tensor(ipexTensor.storage(), at::DispatchKey::CPUTensorId); @@ -266,39 +228,6 @@ at::Tensor shallowFallbackToCPUTensorImpl(const at::Tensor& ipexTensor) { } -// Upgrade CPU tensor to DPCPP Tensor. -// It will allocate new memory buffer and then duplicate the CPU tensor buffer to create new DPCPP Tensor -at::Tensor upgradeToDPCPPTensor(const at::Tensor& cpuTensor) { - TORCH_INTERNAL_ASSERT(cpuTensor.defined()); - TORCH_INTERNAL_ASSERT(!cpuTensor.is_sparse()); - TORCH_INTERNAL_ASSERT(cpuTensor.is_contiguous()); - TORCH_INTERNAL_ASSERT(cpuTensor.layout() == c10::kStrided); - TORCH_INTERNAL_ASSERT(cpuTensor.device().type() == at::DeviceType::CPU); - if (cpuTensor.device().type() == at::DeviceType::DPCPP) { - return cpuTensor; - } - - auto* allocator = c10::GetAllocator(c10::DeviceType::DPCPP); - int64_t nelements = cpuTensor.numel(); - auto dtype = cpuTensor.dtype(); - int64_t data_size = nelements * dtype.itemsize(); - auto storage_impl = c10::make_intrusive( - dtype, - nelements, - allocator->allocate(data_size), - allocator, - /*resizeable=*/true); - memcpy(storage_impl->data(), cpuTensor.unsafeGetTensorImpl()->data(), data_size); - auto&& _tensor = at::detail::make_tensor(storage_impl, at::DispatchKey::DPCPPTensorId); - auto _tensor_sizes = cpuTensor.sizes(); - if (_tensor_sizes.size() != 1 || _tensor_sizes[0] != 0) { - _tensor.unsafeGetTensorImpl()->set_sizes_contiguous(_tensor_sizes); - } - IPEXTensorImpl::CopyMetadata(_tensor.unsafeGetTensorImpl(), cpuTensor.unsafeGetTensorImpl()); - CHECK_TENSOR(_tensor, cpuTensor); - return _tensor; -} - // Upgrade CPU tensor to DPCPP Tensor with shallow copy // It will create an new DPCPP tensor but shares CPU tensor buffer // [NOTE]: Device info of Dense CPU tensor is polluted. @@ -307,12 +236,12 @@ at::Tensor shallowUpgradeToDPCPPTensor(const at::Tensor& cpuTensor) { return at::Tensor(); } - TORCH_INTERNAL_ASSERT(cpuTensor.device().type() == at::DeviceType::CPU); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(cpuTensor.device().type() == at::DeviceType::CPU); if (cpuTensor.is_sparse()) { - TORCH_INTERNAL_ASSERT(cpuTensor.layout() == c10::kSparse); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(cpuTensor.layout() == c10::kSparse); // [NOTE]: Use _indices and _values interfaces to bypass non-coalesced check - TORCH_INTERNAL_ASSERT(cpuTensor._indices().layout() == c10::kStrided); - TORCH_INTERNAL_ASSERT(cpuTensor._values().layout() == c10::kStrided); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(cpuTensor._indices().layout() == c10::kStrided); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(cpuTensor._values().layout() == c10::kStrided); auto&& ipex_indices = shallowUpgradeToDPCPPTensor(cpuTensor._indices()); auto&& ipex_values = shallowUpgradeToDPCPPTensor(cpuTensor._values()); // Create ipex sparse tensor and copy meta data from cpu sparse tensor @@ -327,19 +256,19 @@ at::Tensor shallowUpgradeToDPCPPTensor(const at::Tensor& cpuTensor) { return _tensor; } else { auto *cpu_tensor_impl = cpuTensor.unsafeGetTensorImpl(); - TORCH_INTERNAL_ASSERT(cpu_tensor_impl != nullptr); - TORCH_INTERNAL_ASSERT(cpu_tensor_impl->has_storage()); - TORCH_INTERNAL_ASSERT(cpuTensor.layout() == c10::kStrided); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(cpu_tensor_impl != nullptr); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(cpu_tensor_impl->has_storage()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(cpuTensor.layout() == c10::kStrided); auto cpu_storage = cpu_tensor_impl->storage().unsafeGetStorageImpl(); // [NOTE]: If the deleter of DPCPP::CPU is different form CPU deleter, we need to call // compare_exchange_deleter of DataPtr to update deleter cpu_storage->data_ptr().unsafe_set_device(c10::Device(at::DeviceType::DPCPP)); auto _tensor = at::detail::make_tensor(cpuTensor.storage(), at::DispatchKey::DPCPPTensorId); - TORCH_INTERNAL_ASSERT(_tensor.device().type() == at::DeviceType::DPCPP); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(_tensor.device().type() == at::DeviceType::DPCPP); IPEXTensorImpl* ipex_impl = (IPEXTensorImpl *)_tensor.unsafeGetTensorImpl(); ipex_impl->copy_meta_info(cpu_tensor_impl); - TORCH_INTERNAL_ASSERT(! cpuTensor.requires_grad()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(! cpuTensor.requires_grad()); CHECK_TENSOR_CRITICAL(_tensor, cpuTensor, true); //TODO: Cannot set reserved_ // dest_impl->reserved_ = src_impl->reserved_; @@ -350,25 +279,25 @@ at::Tensor shallowUpgradeToDPCPPTensor(const at::Tensor& cpuTensor) { at::Tensor shallowUpgradeToDPCPPTensorA(const at::Tensor& ipexTensor, const at::Tensor& cpuTensor) { - TORCH_INTERNAL_ASSERT(ipexTensor.defined()); - TORCH_INTERNAL_ASSERT(cpuTensor.defined()); - TORCH_INTERNAL_ASSERT(!ipexTensor.is_sparse()); - TORCH_INTERNAL_ASSERT(!cpuTensor.is_sparse()); - TORCH_INTERNAL_ASSERT(ipexTensor.layout() == c10::kStrided); - TORCH_INTERNAL_ASSERT(cpuTensor.layout() == c10::kStrided); - TORCH_INTERNAL_ASSERT(ipexTensor.device().type() == at::DeviceType::DPCPP); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(ipexTensor.defined()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(cpuTensor.defined()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!ipexTensor.is_sparse()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!cpuTensor.is_sparse()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(ipexTensor.layout() == c10::kStrided); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(cpuTensor.layout() == c10::kStrided); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(ipexTensor.device().type() == at::DeviceType::DPCPP); ipexTensor.unsafeGetTensorImpl()->storage().unsafeGetStorageImpl()->data_ptr().unsafe_set_device(c10::Device(at::DeviceType::DPCPP)); - TORCH_INTERNAL_ASSERT(ipexTensor.storage().device_type() == at::DeviceType::DPCPP); - TORCH_INTERNAL_ASSERT(cpuTensor.device().type() == at::DeviceType::CPU); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(ipexTensor.storage().device_type() == at::DeviceType::DPCPP); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(cpuTensor.device().type() == at::DeviceType::CPU); // The ipexTensor and cpuTensor shares same storage. - TORCH_INTERNAL_ASSERT(cpuTensor.storage().device_type() == at::DeviceType::DPCPP); - TORCH_INTERNAL_ASSERT(ipexTensor.storage().data() == cpuTensor.storage().data()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(cpuTensor.storage().device_type() == at::DeviceType::DPCPP); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(ipexTensor.storage().data() == cpuTensor.storage().data()); auto _tensor = at::detail::make_tensor(at::Storage(ipexTensor.storage()), at::DispatchKey::DPCPPTensorId); - TORCH_INTERNAL_ASSERT(_tensor.device().type() == at::DeviceType::DPCPP); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(_tensor.device().type() == at::DeviceType::DPCPP); IPEXTensorImpl* ipex_impl = (IPEXTensorImpl *)_tensor.unsafeGetTensorImpl(); ipex_impl->copy_meta_info(cpuTensor.unsafeGetTensorImpl()); ipex_impl->copy_auto_grad(cpuTensor.unsafeGetTensorImpl()); @@ -382,25 +311,25 @@ at::Tensor shallowUpgradeToDPCPPTensorA(const at::Tensor& ipexTensor, const at:: // Upgrade CPU tensor to DPCPP Tensor with shallow copy // It will not create an new DPCPP tensor but shares CPU tensor buffer const at::Tensor& shallowUpgradeToDPCPPTensorAW(const at::Tensor& ipexTensor, const at::Tensor& cpuTensor) { - TORCH_INTERNAL_ASSERT(ipexTensor.defined()); - TORCH_INTERNAL_ASSERT(cpuTensor.defined()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(ipexTensor.defined()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(cpuTensor.defined()); // The dispatch priority of DPCPPTensorId is higher than other CPU tensor ids. So if a tensor is CPU and // another tensor is DPCPP, it still will be disptached to DPCPP OPs. // ex, a = tensor(1, device='dpcpp')), a.to('cpu') // The above code will call AtenIpexCPUDefault::copy_ and "self" parameter is cpu tensor and "src" parameter is dpcpp tensor. if (ipexTensor.device().type() == cpuTensor.device().type()) { - TORCH_INTERNAL_ASSERT(cpuTensor.device().type() == at::DeviceType::CPU); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(cpuTensor.device().type() == at::DeviceType::CPU); return ipexTensor; } - TORCH_INTERNAL_ASSERT(cpuTensor.device().type() == at::DeviceType::CPU); - TORCH_INTERNAL_ASSERT(ipexTensor.device().type() == at::DeviceType::DPCPP); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(cpuTensor.device().type() == at::DeviceType::CPU); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(ipexTensor.device().type() == at::DeviceType::DPCPP); if (ipexTensor.is_sparse()) { - TORCH_INTERNAL_ASSERT(cpuTensor.is_sparse()); - TORCH_INTERNAL_ASSERT(ipexTensor.layout() == c10::kSparse); - TORCH_INTERNAL_ASSERT(cpuTensor.layout() == c10::kSparse); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(cpuTensor.is_sparse()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(ipexTensor.layout() == c10::kSparse); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(cpuTensor.layout() == c10::kSparse); // NOTICE: // In PyTorch, alias semantics is `may alias`, not `must alias`. // e.g. some sparse 'alias marked' ops are not alias actually, @@ -426,10 +355,10 @@ const at::Tensor& shallowUpgradeToDPCPPTensorAW(const at::Tensor& ipexTensor, co CHECK_SPARSE_TENSOR_CRITICAL(ipexTensor, cpuTensor, may_alias); return ipexTensor; } else { - TORCH_INTERNAL_ASSERT(!ipexTensor.is_sparse()); - TORCH_INTERNAL_ASSERT(!cpuTensor.is_sparse()); - TORCH_INTERNAL_ASSERT(ipexTensor.layout() == c10::kStrided); - TORCH_INTERNAL_ASSERT(cpuTensor.layout() == c10::kStrided); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!ipexTensor.is_sparse()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!cpuTensor.is_sparse()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(ipexTensor.layout() == c10::kStrided); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(cpuTensor.layout() == c10::kStrided); auto ipex_tensor_storage_impl = ipexTensor.unsafeGetTensorImpl()->storage().unsafeGetStorageImpl(); auto cpu_tensor_storage_impl = cpuTensor.unsafeGetTensorImpl()->storage().unsafeGetStorageImpl(); @@ -441,7 +370,7 @@ const at::Tensor& shallowUpgradeToDPCPPTensorAW(const at::Tensor& ipexTensor, co ipexTensor.unsafeGetTensorImpl()->set_storage(cpuTensor.storage()); } - TORCH_INTERNAL_ASSERT(ipexTensor.data_ptr() == cpuTensor.data_ptr()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(ipexTensor.data_ptr() == cpuTensor.data_ptr()); // NOTE: Cannot set storage data_ptr by set_data_ptr. // set_data_ptr will release caller tensor's original data_ptr. It is wrong here because @@ -465,19 +394,6 @@ const at::Tensor& shallowUpgradeToDPCPPTensorAW(const at::Tensor& ipexTensor, co } -std::vector fallbackToCPUTensorList(const at::TensorList& tensor_list) { - std::vector dpcpp_tensor_vec(tensor_list.size()); - for (size_t i = 0; i < tensor_list.size(); ++i) { - const at::Tensor& tensor = tensor_list[i]; - TORCH_INTERNAL_ASSERT(tensor.defined()); - if (tensor.defined()) { - dpcpp_tensor_vec[i] = fallbackToCPUTensor(tensor); - } - } - return dpcpp_tensor_vec; -} - - std::vector shallowFallbackToCPUTensorList(const at::TensorList& tensor_list) { std::vector dpcpp_tensor_vec(tensor_list.size()); for (size_t i = 0; i < tensor_list.size(); ++i) { @@ -526,8 +442,6 @@ void reorderTensorToScalaraType(const at::Tensor& ipexTensor, at::ScalarType dst if (!check_tensor_own_whole_storage(ipexTensor)) { return; - } else { - TORCH_INTERNAL_ASSERT(false); } if (check_tensor_own_shade_context(ipexTensor)) { @@ -558,26 +472,13 @@ void reorderTensorToScalaraType(const at::Tensor& ipexTensor, at::ScalarType dst ipexTensor.unsafeGetTensorImpl()->set_storage(storage_impl); } -std::vector upgradeToDPCPPTensorVec(const std::vector &tensor_vec) { - std::vector ret_dpcpp_tensor_vec; - for (size_t i = 0; i < tensor_vec.size(); i++) { - auto&& cur_tensor = tensor_vec[i]; - TORCH_INTERNAL_ASSERT(cur_tensor.defined()); - TORCH_INTERNAL_ASSERT(cur_tensor.layout() == c10::kStrided); - TORCH_INTERNAL_ASSERT(cur_tensor.is_contiguous()); - auto&& cur_dpcpp_tensor = upgradeToDPCPPTensor(cur_tensor); - ret_dpcpp_tensor_vec.push_back(cur_dpcpp_tensor); - } - return ret_dpcpp_tensor_vec; -} - std::vector shallowUpgradeToDPCPPTensorVec(const std::vector &tensor_vec) { std::vector ret_dpcpp_tensor_vec; for (size_t i = 0; i < tensor_vec.size(); i++) { auto&& cur_tensor = tensor_vec[i]; - TORCH_INTERNAL_ASSERT(cur_tensor.defined()); - TORCH_INTERNAL_ASSERT(cur_tensor.layout() == c10::kStrided); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(cur_tensor.defined()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(cur_tensor.layout() == c10::kStrided); auto&& cur_dpcpp_tensor = shallowUpgradeToDPCPPTensor(cur_tensor); ret_dpcpp_tensor_vec.push_back(cur_dpcpp_tensor); } diff --git a/torch_ipex/csrc/aten_ipex_bridge.h b/torch_ipex/csrc/aten_ipex_bridge.h index 02ce9822d..bed667198 100644 --- a/torch_ipex/csrc/aten_ipex_bridge.h +++ b/torch_ipex/csrc/aten_ipex_bridge.h @@ -10,9 +10,7 @@ namespace torch_ipex { namespace bridge { // Convert DPCPP tensor to CPU tensor -at::Tensor fallbackToCPUTensor(const at::Tensor& ipexTensor); at::Tensor shallowFallbackToCPUTensor(const at::Tensor& ipexTensor); -std::vector fallbackToCPUTensorList(const at::TensorList&); std::vector shallowFallbackToCPUTensorList(const at::TensorList&); void attachShadeDataConext(const at::Tensor& tensor); @@ -51,9 +49,7 @@ void reorderTensorToScalarTypeForDNNL(const at::Tensor& ipexTensor, at::ScalarTy void reorderTensorToScalaraType(const at::Tensor& ipexTensor, at::ScalarType dstScalarType); // Convert CPU tensor to DPCPP tensor -at::Tensor upgradeToDPCPPTensor(const at::Tensor& ipexTensor); at::Tensor shallowUpgradeToDPCPPTensor(const at::Tensor& ipexTensor); -std::vector upgradeToDPCPPTensorVec(const std::vector &); std::vector shallowUpgradeToDPCPPTensorVec(const std::vector &); // The last character A means alias. This function is for aten alias diff --git a/torch_ipex/csrc/cpu/DevOPs.cpp b/torch_ipex/csrc/cpu/DevOPs.cpp index ac7caaa39..6d8ca4dbc 100644 --- a/torch_ipex/csrc/cpu/DevOPs.cpp +++ b/torch_ipex/csrc/cpu/DevOPs.cpp @@ -28,10 +28,10 @@ namespace cpu { #define DEBUG(fmt) #endif -#define CHECK_DNNL_OP_PRE_COND(tensor) \ - TORCH_INTERNAL_ASSERT(tensor.defined()); \ - TORCH_INTERNAL_ASSERT(tensor.is_contiguous()); \ - TORCH_INTERNAL_ASSERT(tensor.layout() == c10::kStrided) +#define CHECK_DNNL_OP_PRE_COND(tensor) \ + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(tensor.defined()); \ + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(tensor.is_contiguous()); \ + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(tensor.layout() == c10::kStrided) at::Tensor AtenIpexCPUDev::dil_convolution( const at::Tensor & input, @@ -41,6 +41,7 @@ at::Tensor AtenIpexCPUDev::dil_convolution( at::IntArrayRef padding, at::IntArrayRef dilation, int64_t groups) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(false); DEBUG("AtenIpexCPUDev::dil_convolution\n"); dil::tensor dil_input; dil::tensor dil_weight; @@ -175,18 +176,18 @@ at::Tensor AtenIpexCPUDev::dil_convolution_overrideable(const at::Tensor & input at::Tensor AtenIpexCPUDev::mkldnn_convolution(const at::Tensor & self, const at::Tensor & weight, const at::Tensor & bias, at::IntArrayRef padding, at::IntArrayRef stride, at::IntArrayRef dilation, int64_t groups) { DEBUG("AtenIpexCPUDev::mkldnn_convolution\n"); - TORCH_INTERNAL_ASSERT(self.defined()); - TORCH_INTERNAL_ASSERT(weight.defined()); - TORCH_INTERNAL_ASSERT(self.layout() == c10::kStrided); - TORCH_INTERNAL_ASSERT(weight.layout() == c10::kStrided); - TORCH_INTERNAL_ASSERT(!(bias.defined()) || (bias.defined() && bias.layout() == c10::kStrided)); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(self.defined()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(weight.defined()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(self.layout() == c10::kStrided); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(weight.layout() == c10::kStrided); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!(bias.defined()) || (bias.defined() && bias.layout() == c10::kStrided)); auto&& _ipex_self = bridge::shallowFallbackToCPUTensor(self); auto&& _ipex_weight = bridge::shallowFallbackToCPUTensor(weight); auto&& _ipex_bias = bridge::shallowFallbackToCPUTensor(bias); auto&& _ipex_result = at::mkldnn_convolution(_ipex_self.contiguous(), _ipex_weight.contiguous(), _ipex_bias.contiguous(), padding, stride, dilation, groups); static_cast(_ipex_result); // Avoid warnings in case not used - TORCH_INTERNAL_ASSERT(_ipex_result.is_contiguous()); - TORCH_INTERNAL_ASSERT(_ipex_result.layout() == c10::kStrided); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(_ipex_result.is_contiguous()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(_ipex_result.layout() == c10::kStrided); return bridge::shallowUpgradeToDPCPPTensor(_ipex_result); } @@ -210,12 +211,12 @@ std::tuple AtenIpexCPUDev::dil_convolution_bac std::tuple AtenIpexCPUDev::mkldnn_convolution_backward(const at::Tensor & self, const at::Tensor & grad_output, const at::Tensor & weight, at::IntArrayRef padding, at::IntArrayRef stride, at::IntArrayRef dilation, int64_t groups, std::array output_mask) { DEBUG("AtenIpexCPUDev::mkldnn_convolution_backward\n"); - TORCH_INTERNAL_ASSERT(self.defined()); - TORCH_INTERNAL_ASSERT(grad_output.defined()); - TORCH_INTERNAL_ASSERT(weight.defined()); - TORCH_INTERNAL_ASSERT(self.layout() == c10::kStrided); - TORCH_INTERNAL_ASSERT(grad_output.layout() == c10::kStrided); - TORCH_INTERNAL_ASSERT(weight.layout() == c10::kStrided); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(self.defined()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(grad_output.defined()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(weight.defined()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(self.layout() == c10::kStrided); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(grad_output.layout() == c10::kStrided); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(weight.layout() == c10::kStrided); auto&& _ipex_self = bridge::shallowFallbackToCPUTensor(self); auto&& _ipex_grad_output = bridge::shallowFallbackToCPUTensor(grad_output); auto&& _ipex_weight = bridge::shallowFallbackToCPUTensor(weight); diff --git a/torch_ipex/csrc/cpu/ExtendOPs.cpp b/torch_ipex/csrc/cpu/ExtendOPs.cpp index 636bef071..69d08d1bb 100644 --- a/torch_ipex/csrc/cpu/ExtendOPs.cpp +++ b/torch_ipex/csrc/cpu/ExtendOPs.cpp @@ -13,19 +13,19 @@ namespace torch_ipex { void AtenIpexTypeExt::packed_add_(at::Tensor & top_half, at::Tensor & bot_half, const at::Tensor & grad, float alpha) { - TORCH_INTERNAL_ASSERT(grad.scalar_type() == at::ScalarType::BFloat16); - TORCH_INTERNAL_ASSERT(top_half.scalar_type() == at::ScalarType::BFloat16); - TORCH_INTERNAL_ASSERT(bot_half.scalar_type() == at::ScalarType::BFloat16); - TORCH_INTERNAL_ASSERT(grad.device().type() == at::DeviceType::DPCPP); - TORCH_INTERNAL_ASSERT(top_half.device().type() == at::DeviceType::DPCPP); - TORCH_INTERNAL_ASSERT(bot_half.device().type() == at::DeviceType::DPCPP); - TORCH_INTERNAL_ASSERT(top_half.sizes() == bot_half.sizes()); - TORCH_INTERNAL_ASSERT(top_half.is_contiguous()); - TORCH_INTERNAL_ASSERT(bot_half.is_contiguous()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(grad.scalar_type() == at::ScalarType::BFloat16); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(top_half.scalar_type() == at::ScalarType::BFloat16); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(bot_half.scalar_type() == at::ScalarType::BFloat16); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(grad.device().type() == at::DeviceType::DPCPP); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(top_half.device().type() == at::DeviceType::DPCPP); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(bot_half.device().type() == at::DeviceType::DPCPP); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(top_half.sizes() == bot_half.sizes()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(top_half.is_contiguous()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(bot_half.is_contiguous()); RECORD_FUNCTION("packed_add_", std::vector({top_half, bot_half, grad, alpha}), torch::autograd::Node::peek_at_next_sequence_nr()); if (grad.is_sparse()) { - TORCH_INTERNAL_ASSERT(top_half.dim() == 2); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(top_half.dim() == 2); auto sparse_nnz = grad._nnz(); auto sparse_dim = grad.sparse_dim(); auto values = grad._values(); @@ -34,14 +34,14 @@ void AtenIpexTypeExt::packed_add_(at::Tensor & top_half, at::Tensor & bot_half, auto feature_size = values.stride(0); auto indices_accessor = indices.accessor(); - TORCH_INTERNAL_ASSERT(values.is_contiguous()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(values.is_contiguous()); auto value_ptr = values.data_ptr(); auto top_half_ptr = top_half.data_ptr(); auto bot_half_ptr = bot_half.data_ptr(); - TORCH_INTERNAL_ASSERT(value_ptr != nullptr); - TORCH_INTERNAL_ASSERT(top_half_ptr != nullptr); - TORCH_INTERNAL_ASSERT(bot_half_ptr != nullptr); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(value_ptr != nullptr); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(top_half_ptr != nullptr); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(bot_half_ptr != nullptr); std::vector sparse_stride(sparse_dim); for (int64_t d = 0; d < sparse_dim; d++) { @@ -80,7 +80,7 @@ void AtenIpexTypeExt::packed_add_(at::Tensor & top_half, at::Tensor & bot_half, } }); } else { - TORCH_INTERNAL_ASSERT(grad.is_contiguous()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(grad.is_contiguous()); //TODO: vector implementation basing on vector size union packed_bf16 { unsigned short s[2]; @@ -201,15 +201,15 @@ inline at::Tensor _interaction_forward(const std::vector & input) { std::vector feature_sizes(input.size()); std::vector input_data(input.size()); for (int i = 0; i < input.size(); i++) { - TORCH_INTERNAL_ASSERT(input[i].is_contiguous()); - TORCH_INTERNAL_ASSERT(input[i].device().is_dpcpp()); - TORCH_INTERNAL_ASSERT(input[i].dim() == 2); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(input[i].is_contiguous()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(input[i].device().is_dpcpp()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(input[i].dim() == 2); feature_sizes[i] = input[i].sizes()[1]; total_feature_size += input[i].sizes()[1]; input_data[i] = input[i].data_ptr(); } auto vector_nums = total_feature_size / vector_size; - TORCH_INTERNAL_ASSERT(total_feature_size % vector_size == 0); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(total_feature_size % vector_size == 0); auto interact_feature_size = vector_nums * (vector_nums - 1) / 2; auto tr_vector_size = sizeof(T) == 4 ? vector_size : vector_size / 2; auto out = at::empty({batch_size, interact_feature_size + vector_size}, input[0].options()); @@ -239,7 +239,7 @@ inline at::Tensor _interaction_forward(const std::vector & input) { template inline std::vector _interaction_backward(const at::Tensor & grad_out, const std::vector & input) { - TORCH_INTERNAL_ASSERT(grad_out.is_contiguous()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(grad_out.is_contiguous()); RECORD_FUNCTION("_interaction_backward", std::vector({grad_out, input}), torch::autograd::Node::peek_at_next_sequence_nr()); uint32_t total_feature_size = 0; int64_t batch_size = input[0].sizes()[0]; @@ -257,7 +257,7 @@ inline std::vector _interaction_backward(const at::Tensor & grad_out output_data[i] = output[i].data_ptr(); } auto vector_nums = total_feature_size / vector_size; - TORCH_INTERNAL_ASSERT(total_feature_size % vector_size == 0); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(total_feature_size % vector_size == 0); auto interact_feature_size = vector_nums * (vector_nums - 1) / 2; auto grad_out_data = grad_out.data_ptr(); @@ -305,11 +305,11 @@ inline std::vector _interaction_backward(const at::Tensor & grad_out at::Tensor AtenIpexTypeExt::interaction_forward(const std::vector & input) { if (input[0].scalar_type() == at::kFloat) { - for (const auto &in : input) { TORCH_INTERNAL_ASSERT(in.scalar_type() == at::kFloat); } + for (const auto &in : input) { TORCH_INTERNAL_ASSERT_DEBUG_ONLY(in.scalar_type() == at::kFloat); } return _interaction_forward(input); } else { - TORCH_INTERNAL_ASSERT(input[0].scalar_type() == at::kBFloat16); - for (const auto &in : input) { TORCH_INTERNAL_ASSERT(in.scalar_type() == at::kBFloat16); } + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(input[0].scalar_type() == at::kBFloat16); + for (const auto &in : input) { TORCH_INTERNAL_ASSERT_DEBUG_ONLY(in.scalar_type() == at::kBFloat16); } return _interaction_forward(input); } } @@ -318,18 +318,18 @@ std::vector AtenIpexTypeExt::interaction_backward(const at::Tensor & if (grad_out.scalar_type() == at::kFloat) { return _interaction_backward(grad_out, input); } else { - TORCH_INTERNAL_ASSERT(grad_out.scalar_type() == at::kBFloat16); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(grad_out.scalar_type() == at::kBFloat16); return _interaction_backward(grad_out, input); } } template static inline at::Tensor _embedding_bag_forward(const at::Tensor &weights, const at::Tensor &inputs, const at::Tensor &offsets) { - TORCH_INTERNAL_ASSERT(weights.is_contiguous()); - TORCH_INTERNAL_ASSERT(inputs.is_contiguous()); - TORCH_INTERNAL_ASSERT(offsets.is_contiguous()); - TORCH_INTERNAL_ASSERT(inputs.dim() == 1); - TORCH_INTERNAL_ASSERT(weights.dim() == 2); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(weights.is_contiguous()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(inputs.is_contiguous()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(offsets.is_contiguous()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(inputs.dim() == 1); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(weights.dim() == 2); RECORD_FUNCTION("_embedding_bag_forward", std::vector({weights, inputs, offsets}), torch::autograd::Node::peek_at_next_sequence_nr()); auto batch_size = offsets.size(0); auto num_input = inputs.size(0); @@ -345,7 +345,7 @@ static inline at::Tensor _embedding_bag_forward(const at::Tensor &weights, const auto inputs_start = offsets_data[i]; auto inputs_end = (i < batch_size - 1) ? offsets_data[i + 1] : num_input; // TODO: add acc_t support for bag size larger than 1 - TORCH_INTERNAL_ASSERT(inputs_end - inputs_start == 1); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(inputs_end - inputs_start == 1); auto out_data_ptr = &output_data[i * vector_size]; #pragma omp simd for (int64_t v = 0; v < vector_size; v++) out_data_ptr[v] = 0.0; @@ -361,8 +361,8 @@ static inline at::Tensor _embedding_bag_forward(const at::Tensor &weights, const template static inline at::Tensor _embedding_bag_backward(const at::Tensor &grad_out, const at::Tensor &weights, const at::Tensor &inputs, const at::Tensor offsets) { - TORCH_INTERNAL_ASSERT(inputs.dim() == 1); - TORCH_INTERNAL_ASSERT(grad_out.dim() == 2); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(inputs.dim() == 1); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(grad_out.dim() == 2); RECORD_FUNCTION("_embedding_bag_backward", std::vector({grad_out, weights, inputs, offsets}), torch::autograd::Node::peek_at_next_sequence_nr()); auto batch_size = offsets.size(0); auto num_input = inputs.size(0); @@ -408,7 +408,7 @@ at::Tensor AtenIpexTypeExt::embedding_bag_forward(const at::Tensor &weights, con if (weights.scalar_type() == at::kFloat) { return _embedding_bag_forward(weights, inputs, offsets); } else { - TORCH_INTERNAL_ASSERT(weights.scalar_type() == at::kBFloat16); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(weights.scalar_type() == at::kBFloat16); return _embedding_bag_forward(weights, inputs, offsets); } } @@ -418,7 +418,7 @@ at::Tensor AtenIpexTypeExt::embedding_bag_backward(const at::Tensor &grad_out, if (grad_out.scalar_type() == at::kFloat) { return _embedding_bag_backward(grad_out, weights, inputs, offsets); } else { - TORCH_INTERNAL_ASSERT(grad_out.scalar_type() == at::kBFloat16); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(grad_out.scalar_type() == at::kBFloat16); return _embedding_bag_backward(grad_out, weights, inputs, offsets); } } diff --git a/torch_ipex/csrc/cpu/MlpOPs.cpp b/torch_ipex/csrc/cpu/MlpOPs.cpp index 017a6c5ec..4d3137955 100644 --- a/torch_ipex/csrc/cpu/MlpOPs.cpp +++ b/torch_ipex/csrc/cpu/MlpOPs.cpp @@ -93,7 +93,7 @@ std::vector AtenIpexTypeMLPExt::backward( const at::Tensor &grad_output, const at::Tensor &input, const at::Tensor &weight) { - TORCH_INTERNAL_ASSERT(libxsmm_handle_ != nullptr); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(libxsmm_handle_ != nullptr); libxsmm_dnn_err_t global_status; auto nbn = input.size(0); auto nbc = input.size(1); @@ -173,12 +173,12 @@ void *AtenIpexTypeMLPExt::create_handle(int N, int C, int K, int bn, int bc, int } at::Tensor AtenIpexTypeMLPExt::set_relu_mask(void *libxsmm_handle_) { - TORCH_INTERNAL_ASSERT(libxsmm_handle_ != nullptr); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(libxsmm_handle_ != nullptr); libxsmm_dnn_fullyconnected *handle = (libxsmm_dnn_fullyconnected*)libxsmm_handle_; libxsmm_dnn_err_t status; libxsmm_dnn_err_t global_status; libxsmm_dnn_tensor_datalayout* layout = libxsmm_dnn_fullyconnected_create_tensor_datalayout(handle, LIBXSMM_DNN_RELU_MASK, &status); CHKERR_LIBXSMM_DNN( status ); - TORCH_INTERNAL_ASSERT(layout != nullptr); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout != nullptr); std::vector dim_size; for (int i = layout->num_dims - 1; i >= 0; i--) { dim_size.push_back(layout->dim_size[i]); @@ -192,7 +192,7 @@ at::Tensor AtenIpexTypeMLPExt::set_relu_mask(void *libxsmm_handle_) { } void AtenIpexTypeMLPExt::release_handle(void* libxsmm_handle_) { - TORCH_INTERNAL_ASSERT(libxsmm_handle_ != nullptr); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(libxsmm_handle_ != nullptr); libxsmm_dnn_err_t global_status; libxsmm_dnn_err_t status; libxsmm_dnn_fullyconnected* libxsmm_handle = (libxsmm_dnn_fullyconnected*)libxsmm_handle_; diff --git a/torch_ipex/csrc/cpu/ShadeDataContext.h b/torch_ipex/csrc/cpu/ShadeDataContext.h index ace9ce5dd..37216a83e 100644 --- a/torch_ipex/csrc/cpu/ShadeDataContext.h +++ b/torch_ipex/csrc/cpu/ShadeDataContext.h @@ -27,17 +27,17 @@ struct ShadeDataContext { ~ShadeDataContext() { if (this->data_type == SHADE_DATA_TYPE::DIL) { // DIL Tensor if (this->dil_tensor.is_public_format()) { - TORCH_INTERNAL_ASSERT(this->cpu_raw_data != nullptr); - TORCH_INTERNAL_ASSERT(this->dil_tensor.get_data_handle() == this->cpu_raw_data); - TORCH_INTERNAL_ASSERT(this->cpu_del_fun == &(c10::detail::deleteNothing)); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(this->cpu_raw_data != nullptr); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(this->dil_tensor.get_data_handle() == this->cpu_raw_data); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(this->cpu_del_fun == &(c10::detail::deleteNothing)); } else { // If dil tensor is block format, the cpu raw data means nothing here. - TORCH_INTERNAL_ASSERT(this->cpu_raw_data == nullptr); - TORCH_INTERNAL_ASSERT(this->cpu_del_fun == nullptr); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(this->cpu_raw_data == nullptr); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(this->cpu_del_fun == nullptr); } } else { // CPU Tensor here - TORCH_INTERNAL_ASSERT(this->cpu_del_fun != nullptr); - TORCH_INTERNAL_ASSERT(this->cpu_del_fun != &(c10::detail::deleteNothing)); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(this->cpu_del_fun != nullptr); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(this->cpu_del_fun != &(c10::detail::deleteNothing)); this->cpu_del_fun(this->cpu_raw_data); this->cpu_raw_data = nullptr; } @@ -49,10 +49,10 @@ struct ShadeDataContext { * @param raw_data Raw pointer of @class ShadeDataContext */ static void freeShadeDataContext(void *raw_data) { - TORCH_INTERNAL_ASSERT(raw_data != nullptr); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(raw_data != nullptr); ShadeDataContext *shade_data_ctx = (ShadeDataContext*)raw_data; auto data_type = shade_data_ctx->data_type; - TORCH_INTERNAL_ASSERT((data_type == SHADE_DATA_TYPE::CPU_RAW) || (data_type == SHADE_DATA_TYPE::DIL)); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY((data_type == SHADE_DATA_TYPE::CPU_RAW) || (data_type == SHADE_DATA_TYPE::DIL)); delete shade_data_ctx; } @@ -74,11 +74,11 @@ struct ShadeDataContext { * only contains DNNL buffer, it obiviouly is DNNL tensor */ static inline bool isDilTensor(const at::Tensor &tensor) { - TORCH_INTERNAL_ASSERT(tensor.has_storage()); - TORCH_INTERNAL_ASSERT(tensor.layout() == c10::Layout::Strided); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(tensor.has_storage()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(tensor.layout() == c10::Layout::Strided); if (tensor.device().type() != c10::DeviceType::DPCPP) { - TORCH_INTERNAL_ASSERT(tensor.device().type() == c10::DeviceType::CPU); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(tensor.device().type() == c10::DeviceType::CPU); return false; } @@ -87,17 +87,17 @@ struct ShadeDataContext { void *storage_context = tensor.storage().data_ptr().get_context(); ShadeDataContext *shade_data_context = (ShadeDataContext*)storage_context; auto data_type = shade_data_context->data_type; - TORCH_INTERNAL_ASSERT((data_type == SHADE_DATA_TYPE::CPU_RAW) || (data_type == SHADE_DATA_TYPE::DIL)); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY((data_type == SHADE_DATA_TYPE::CPU_RAW) || (data_type == SHADE_DATA_TYPE::DIL)); if (data_type == SHADE_DATA_TYPE::DIL) { auto raw_cpu_data = tensor.storage().data_ptr().get(); if (raw_cpu_data == nullptr) { // the dnnl tensor does not share data with raw tensor data. - TORCH_INTERNAL_ASSERT(! (shade_data_context->dil_tensor.is_empty())); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(! (shade_data_context->dil_tensor.is_empty())); return true; } else { // The dnnl tensor shares some data with raw tensor. - TORCH_INTERNAL_ASSERT(shade_data_context->dil_tensor.is_public_format()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(shade_data_context->dil_tensor.is_public_format()); // For the case: // 1. There is a tensor named A @@ -111,7 +111,7 @@ struct ShadeDataContext { // All these tensors share same buffer of Tensor A with different storge offsets and elements. // So the context modification will impact all these tensors. if (check_tensor_own_whole_storage(tensor)) { - TORCH_INTERNAL_ASSERT(shade_data_context->dil_tensor.get_size() == tensor.storage().capacity()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(shade_data_context->dil_tensor.get_size() == tensor.storage().capacity()); return true; } } @@ -138,10 +138,10 @@ struct ShadeDataContext { * an empty DNNL buffer. The caller should check the return buffer is empty or not. */ static inline dil::tensor getDilTensor(const at::Tensor &tensor) { - TORCH_INTERNAL_ASSERT(tensor.has_storage()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(tensor.has_storage()); void *raw_context = tensor.storage().data_ptr().get_context(); - TORCH_INTERNAL_ASSERT(raw_context != nullptr); - TORCH_INTERNAL_ASSERT(isDilTensor(tensor)); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(raw_context != nullptr); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(isDilTensor(tensor)); ShadeDataContext *shade_data_context = (ShadeDataContext*)raw_context; return shade_data_context->dil_tensor; } @@ -155,15 +155,15 @@ struct ShadeDataContext { * and return it to the caller. Otherwise, the function will return nullptr */ static inline void * getCpuRawData(const at::Tensor &tensor) { - TORCH_INTERNAL_ASSERT(tensor.has_storage()); - TORCH_INTERNAL_ASSERT(tensor.unsafeGetTensorImpl()->unique_version()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(tensor.has_storage()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(tensor.unsafeGetTensorImpl()->unique_version()); if (isCpuTensor(tensor)) { auto& data_ptr = tensor.storage().data_ptr(); ShadeDataContext *shade_data_context = (ShadeDataContext*)(data_ptr.get_context()); - TORCH_INTERNAL_ASSERT(shade_data_context != nullptr); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(shade_data_context != nullptr); return shade_data_context->cpu_raw_data; } else { - TORCH_INTERNAL_ASSERT(false); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(false); return nullptr; } } diff --git a/torch_ipex/csrc/cpu/SparseAttr.cpp b/torch_ipex/csrc/cpu/SparseAttr.cpp index d7d92a2c6..8dab5ce5c 100644 --- a/torch_ipex/csrc/cpu/SparseAttr.cpp +++ b/torch_ipex/csrc/cpu/SparseAttr.cpp @@ -5,59 +5,59 @@ namespace torch_ipex { namespace cpu { int64_t AtenIpexCPUSparse::sparse_dim(const at::Tensor & self) { - TORCH_INTERNAL_ASSERT(self.layout() == c10::kSparse); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(self.layout() == c10::kSparse); return IPEXSparseTensorImpl::get_ipex_sparse_impl(self)->sparse_dim(); } int64_t AtenIpexCPUSparse::dense_dim(const at::Tensor & self) { - TORCH_INTERNAL_ASSERT(self.layout() == c10::kSparse); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(self.layout() == c10::kSparse); return IPEXSparseTensorImpl::get_ipex_sparse_impl(self)->dense_dim(); } int64_t AtenIpexCPUSparse::_dimI(const at::Tensor & self) { - TORCH_INTERNAL_ASSERT(self.layout() == c10::kSparse); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(self.layout() == c10::kSparse); return IPEXSparseTensorImpl::get_ipex_sparse_impl(self)->sparse_dim(); } int64_t AtenIpexCPUSparse::_dimV(const at::Tensor & self) { - TORCH_INTERNAL_ASSERT(self.layout() == c10::kSparse); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(self.layout() == c10::kSparse); return IPEXSparseTensorImpl::get_ipex_sparse_impl(self)->dense_dim(); } int64_t AtenIpexCPUSparse::_nnz(const at::Tensor & self) { - TORCH_INTERNAL_ASSERT(self.layout() == c10::kSparse); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(self.layout() == c10::kSparse); return IPEXSparseTensorImpl::get_ipex_sparse_impl(self)->nnz(); } bool AtenIpexCPUSparse::is_coalesced(const at::Tensor & self) { - TORCH_INTERNAL_ASSERT(self.layout() == c10::kSparse); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(self.layout() == c10::kSparse); return IPEXSparseTensorImpl::get_ipex_sparse_impl(self)->coalesced(); } at::Tensor & AtenIpexCPUSparse::_coalesced_(at::Tensor & self, bool coalesced) { - TORCH_INTERNAL_ASSERT(self.layout() == c10::kSparse); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(self.layout() == c10::kSparse); IPEXSparseTensorImpl::get_ipex_sparse_impl(self)->set_coalesced(coalesced); return self; } at::Tensor AtenIpexCPUSparse::_indices(const at::Tensor & self) { - TORCH_INTERNAL_ASSERT(self.layout() == c10::kSparse); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(self.layout() == c10::kSparse); return IPEXSparseTensorImpl::get_ipex_sparse_impl(self)->indices(); } at::Tensor AtenIpexCPUSparse::_values(const at::Tensor & self) { - TORCH_INTERNAL_ASSERT(self.layout() == c10::kSparse); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(self.layout() == c10::kSparse); return IPEXSparseTensorImpl::get_ipex_sparse_impl(self)->values(); } at::Tensor AtenIpexCPUSparse::indices(const at::Tensor& self) { - TORCH_INTERNAL_ASSERT(self.is_coalesced(), + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(self.is_coalesced(), "Cannot get indices on an uncoalesced tensor, please call .coalesce() first"); return IPEXSparseTensorImpl::get_ipex_sparse_impl(self)->indices().alias(); } at::Tensor AtenIpexCPUSparse::values(const at::Tensor& self) { - TORCH_INTERNAL_ASSERT(self.is_coalesced(), + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(self.is_coalesced(), "Cannot get values on an uncoalesced tensor, please call .coalesce() first"); return IPEXSparseTensorImpl::get_ipex_sparse_impl(self)->values().alias(); } diff --git a/torch_ipex/csrc/cpu/dbl/Common.cpp b/torch_ipex/csrc/cpu/dbl/Common.cpp index bf338402d..7432d3a9e 100644 --- a/torch_ipex/csrc/cpu/dbl/Common.cpp +++ b/torch_ipex/csrc/cpu/dbl/Common.cpp @@ -24,8 +24,8 @@ dil::tensor dil_tensor_from_dense(const at::Tensor& tensor) { } at::Tensor dil_tensor_to_dense(const at::Tensor& tensor) { - TORCH_INTERNAL_ASSERT(cpu::ShadeDataContext::isDilTensor(tensor)); - TORCH_INTERNAL_ASSERT(tensor.unsafeGetTensorImpl()->version_counter().current_version() == 1); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(cpu::ShadeDataContext::isDilTensor(tensor)); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(tensor.unsafeGetTensorImpl()->version_counter().current_version() == 1); auto dil_tensor = cpu::ShadeDataContext::getDilTensor(tensor); at::Tensor cpu_tensor = at::empty( dil_tensor.get_dims(), @@ -76,10 +76,10 @@ at::Tensor gen_aten_tensor_by(dil::tensor dil_tensor) { } else { // Blockformat does not inlcude stride information auto tensor_sizes = dil_tensor.get_dims(); - TORCH_INTERNAL_ASSERT(tensor_sizes.size() != 1 || tensor_sizes[0] != 0); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(tensor_sizes.size() != 1 || tensor_sizes[0] != 0); _tensor.unsafeGetTensorImpl()->set_sizes_contiguous(tensor_sizes); } - TORCH_INTERNAL_ASSERT(_tensor.layout() == c10::kStrided); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(_tensor.layout() == c10::kStrided); return _tensor; } @@ -95,7 +95,7 @@ at::Tensor empty_dil_tensor(at::IntArrayRef sizes, const at::TensorOptions& opti void sync_shape_from_dil_to_aten(const at::Tensor& ipex_tensor, const dil::tensor &dil_tensor) { dil::dims sizes = dil_tensor.get_dims(); dil::dims strides = dil_tensor.get_strides(); - TORCH_INTERNAL_ASSERT(ipex_tensor.device().type() == at::DeviceType::DPCPP); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(ipex_tensor.device().type() == at::DeviceType::DPCPP); auto* _tensor_impl = (IPEXTensorImpl *)ipex_tensor.unsafeGetTensorImpl(); _tensor_impl->force_set_strided(sizes, strides); } diff --git a/torch_ipex/csrc/ipex_sparse_tensor_impl.cpp b/torch_ipex/csrc/ipex_sparse_tensor_impl.cpp index 6c2c8fdc0..6efd2443c 100644 --- a/torch_ipex/csrc/ipex_sparse_tensor_impl.cpp +++ b/torch_ipex/csrc/ipex_sparse_tensor_impl.cpp @@ -7,7 +7,7 @@ IPEXSparseTensorImpl::IPEXSparseTensorImpl(at::DispatchKeySet type_set, const ca } IPEXSparseTensorImpl * IPEXSparseTensorImpl::get_ipex_sparse_impl(const at::Tensor& ipex_tensor) { - TORCH_INTERNAL_ASSERT(ipex_tensor.layout() == c10::kSparse); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(ipex_tensor.layout() == c10::kSparse); // TORCH_INTERNAL_ASSERT(ipex_tensor.device().type() == at::DeviceType::DPCPP); return static_cast(ipex_tensor.unsafeGetTensorImpl()); } diff --git a/torch_ipex/csrc/ipex_tensor_impl.cpp b/torch_ipex/csrc/ipex_tensor_impl.cpp index 851693956..934674f8c 100644 --- a/torch_ipex/csrc/ipex_tensor_impl.cpp +++ b/torch_ipex/csrc/ipex_tensor_impl.cpp @@ -70,7 +70,7 @@ void IPEXTensorImpl::reset_data_type(at::ScalarType dst_type) { void IPEXTensorImpl::copy_auto_grad(c10::TensorImpl *src_impl) { if (! src_impl->requires_grad()) { - TORCH_INTERNAL_ASSERT(! this->requires_grad()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(! this->requires_grad()); return; } diff --git a/torch_ipex/csrc/jit/fusion_pass.cpp b/torch_ipex/csrc/jit/fusion_pass.cpp index 539aba0aa..62ca9c86c 100644 --- a/torch_ipex/csrc/jit/fusion_pass.cpp +++ b/torch_ipex/csrc/jit/fusion_pass.cpp @@ -262,14 +262,14 @@ class OpFuser { // Y-merge like case // if (safe && node->inputs().size() > 1) { - TORCH_INTERNAL_ASSERT(r); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(r); auto rule = *r.value(); auto& schema = matchSchemaForFusion(rule.second, v->node(), node); auto o_schema = node->schema(); auto pos = v->node()->inputs().size(); - TORCH_INTERNAL_ASSERT(schema.arguments().size() + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(schema.arguments().size() == pos + node->inputs().size() -1); for (int i = 0; i < node->inputs().size(); ++ i) {