@@ -40,9 +40,9 @@ namespace cpu {
40
40
TORCH_INTERNAL_ASSERT_DEBUG_ONLY (tensor.defined()); \
41
41
TORCH_INTERNAL_ASSERT_DEBUG_ONLY (tensor.layout() == c10::kStrided )
42
42
43
- #define CHECK_ATEN_BF16_USABLE (tensor ) \
44
- ShadeDataContext::isDilTensor (tensor) && \
45
- ShadeDataContext::isTensorMixPrecision (tensor) && \
43
+ #define CHECK_ATEN_BF16_USABLE (tensor ) \
44
+ ShadeDataContext::isDilTensor (tensor) && \
45
+ ShadeDataContext::isTensorMixPrecision (tensor) && \
46
46
ShadeDataContext::getDilStorage (tensor).get_data_type() == dil::data_type::bf16 && \
47
47
dbl::comm::try_gen_dil_tensor (tensor).is_public_format()
48
48
@@ -2236,6 +2236,7 @@ at::Tensor AtenIpexCPUDev::dil_transpose(const at::Tensor & self, int64_t dim0,
2236
2236
at::Tensor AtenIpexCPUDev::dil_slice (const at::Tensor & self, int64_t dim, int64_t start, int64_t end, int64_t step) {
2237
2237
DEBUG (" AtenIpexCPUDev::dil_slice\n " );
2238
2238
CHECK_DNNL_OP_PRE_COND (self);
2239
+ dbl::comm::reorder_to_public (self, /* remain_dtype=*/ true );
2239
2240
2240
2241
// TODO use weight TAG to decide whether to reorder or not
2241
2242
dbl::comm::reorder_to_bf16_for_mix_prec (self, true );
@@ -2578,7 +2579,7 @@ at::Tensor AtenIpexCPUDev::dil_index(const at::Tensor & self, at::TensorList ind
2578
2579
at::Tensor AtenIpexCPUDev::dil_shuffle (const at::Tensor & self, at::IntArrayRef view_shape, int64_t dim0, int64_t dim1) {
2579
2580
DEBUG (" AtenIpexCPUDev::dil_shuffle\n " );
2580
2581
#if defined(IPEX_PROFILE_OP)
2581
- RECORD_FUNCTION (" AtenIpexCPUDev::dil_shuffle" , std::vector<c10::IValue>({self }));
2582
+ RECORD_FUNCTION (" AtenIpexCPUDev::dil_shuffle" , std::vector<c10::IValue>({}));
2582
2583
#endif
2583
2584
// NOTE: We do NOT add sanity checks here. Because PyTorch does not has shuffle operator. This dil operator is for fusion and the fusion logic
2584
2585
// has more sanity checks. We found that there are some models use view + transpose + view to implement shuffle semantic. So IPEX will fuse these
@@ -2594,7 +2595,7 @@ at::Tensor AtenIpexCPUDev::dil_shuffle(const at::Tensor & self, at::IntArrayRef
2594
2595
std::tuple<at::Tensor,at::Tensor> AtenIpexCPUDev::dil__pack_padded_sequence (const at::Tensor & input, const at::Tensor & lengths, bool batch_first) {
2595
2596
DEBUG (" AtenIpexCPUDev::dil__pack_padded_sequence\n " );
2596
2597
#if defined(IPEX_PROFILE_OP)
2597
- RECORD_FUNCTION (" AtenIpexCPUDev::dil__pack_padded_sequence" , std::vector<c10::IValue>({input, lengths }));
2598
+ RECORD_FUNCTION (" AtenIpexCPUDev::dil__pack_padded_sequence" , std::vector<c10::IValue>({}));
2598
2599
#endif
2599
2600
torch_ipex::reset_ipex_func_status ();
2600
2601
@@ -2638,7 +2639,7 @@ at::Tensor& AtenIpexCPUDev::dil_copy_(
2638
2639
2639
2640
std::vector<at::Tensor> AtenIpexCPUDev::dil_rnn_layer (const at::Tensor& input, const at::Tensor& w1, const at::Tensor& w2,
2640
2641
const at::Tensor& w3, const at::Tensor& w4, const at::Tensor& hx, const at::Tensor& cx, bool reverse, int64_t mode,
2641
- int64_t hidden_size, int64_t num_layers, bool has_biases, bool train, bool bidirectional, at::IntArrayRef batch_sizes,
2642
+ int64_t hidden_size, int64_t num_layers, bool has_biases, bool train, bool bidirectional, at::IntArrayRef batch_sizes,
2642
2643
const std::vector<float >& scales, const std::vector<int32_t >& shift, bool quantized) {
2643
2644
DEBUG (" AtenIpexCPUDev::dil_rnn_layer\n " );
2644
2645
0 commit comments