Skip to content

Commit b154e6d

Browse files
committed
1. Support NHWC
2. Remove recorder tensors to reduce pytorch profiler overhead
1 parent e920f7b commit b154e6d

File tree

7 files changed

+100
-64
lines changed

7 files changed

+100
-64
lines changed

scripts/cpu/gen-sparse-cpu-ops.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,7 @@ def gen_code(self):
406406
if param.core_type in ['Tensor', 'Scalar']:
407407
profiler_inputs.append(param.name)
408408
code += '#if defined(IPEX_PROFILE_OP)\n'
409-
code += ' RECORD_FUNCTION("{ns}::{name}", std::vector<c10::IValue>({{{input_names}}}));\n'.format(ns=_IPEX_OP_FUNC_NS, name=cpp_sparse_sig.def_name, input_names=', '.join(profiler_inputs))
409+
code += ' RECORD_FUNCTION("{ns}::{name}", std::vector<c10::IValue>({{{input_names}}}));\n'.format(ns=_IPEX_OP_FUNC_NS, name=cpp_sparse_sig.def_name, input_names='')
410410
code += '#endif\n'
411411

412412
code += self.gen_fallback_prepare_code(cpp_sparse_sig)

torch_ipex/csrc/aten_ipex_bridge.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ namespace bridge {
2626
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(a.unsafeGetTensorImpl()->dtype() == b.unsafeGetTensorImpl()->dtype()); \
2727
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(a.unsafeGetTensorImpl()->is_contiguous() == b.unsafeGetTensorImpl()->is_contiguous()); \
2828
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(a.unsafeGetTensorImpl()->is_contiguous(at::MemoryFormat::ChannelsLast) == b.unsafeGetTensorImpl()->is_contiguous(at::MemoryFormat::ChannelsLast)); \
29+
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(a.unsafeGetTensorImpl()->is_contiguous(at::MemoryFormat::ChannelsLast3d) == b.unsafeGetTensorImpl()->is_contiguous(at::MemoryFormat::ChannelsLast3d)); \
2930
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(a.unsafeGetTensorImpl()->is_strides_like_channels_last() == b.unsafeGetTensorImpl()->is_strides_like_channels_last()); \
3031
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(a.unsafeGetTensorImpl()->is_non_overlapping_and_dense() == b.unsafeGetTensorImpl()->is_non_overlapping_and_dense()); \
3132
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(a.unsafeGetTensorImpl()->is_wrapped_number() == b.unsafeGetTensorImpl()->is_wrapped_number()); \

torch_ipex/csrc/cpu/DevOPs.cpp

Lines changed: 48 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ std::tuple<at::Tensor,at::Tensor,at::Tensor> AtenIpexCPUDev::dil_convolution_bac
232232
at::IntArrayRef padding, at::IntArrayRef stride, at::IntArrayRef dilation, int64_t groups, std::array<bool,3> output_mask)
233233
{
234234
DEBUG("AtenIpexCPUDev::dil_convolution_backward\n");
235-
at::Tensor grad_output = grad_output_t.is_contiguous() ? grad_output_t : grad_output_t.contiguous();
235+
at::Tensor grad_output = IS_CONTIGUOUS_ANY(grad_output_t) ? grad_output_t : grad_output_t.contiguous();
236236
CHECK_DNNL_OP_PRE_COND(input);
237237
CHECK_DNNL_OP_PRE_COND(weight);
238238
dbl::comm::reorder_to_bf16_for_mix_prec(input);
@@ -429,14 +429,29 @@ at::Tensor AtenIpexCPUDev::dil_convolution_overrideable(const at::Tensor & input
429429
}
430430
if (dbl::chk::dnnl_support_the_tensors(dnnl_input_tensors)) {
431431
if (transposed) {
432-
return AtenIpexCPUDev::dil_deconvolution(input.is_contiguous() ? input : input.contiguous(), weight.is_contiguous() ? weight : weight.contiguous(), (bias.has_value() && bias.value().defined()) ? (bias.value().is_contiguous() ? bias.value() : bias.value().contiguous()) : at::Tensor(), padding, output_padding, stride, dilation, groups);
432+
return AtenIpexCPUDev::dil_deconvolution(
433+
IS_CONTIGUOUS_ANY(input) ? input : input.contiguous(),
434+
IS_CONTIGUOUS_ANY(weight) ? weight : weight.contiguous(),
435+
(bias.has_value() && bias.value().defined()) ? (IS_CONTIGUOUS_ANY(bias.value()) ? bias.value() : bias.value().contiguous()) : at::Tensor(),
436+
padding,
437+
output_padding,
438+
stride,
439+
dilation,
440+
groups);
433441
} else {
434442
// for int8 path, input always acbd format which is non-contiguous, .contiguous() will reorder to fp32
435443
auto src_dil_type = dbl::comm::try_gen_dil_tensor(input).get_data_type();
436-
auto input_temp = (src_dil_type == dil::data_type::u8 || src_dil_type == dil::data_type::s8 || input.is_contiguous()) ? input : input.contiguous();
444+
auto input_temp = (src_dil_type == dil::data_type::u8 || src_dil_type == dil::data_type::s8 || IS_CONTIGUOUS_ANY(input)) ? input : input.contiguous();
437445
auto weight_dil_type = dbl::comm::try_gen_dil_tensor(weight).get_data_type();
438-
auto weight_temp = (weight_dil_type == dil::data_type::s8 || weight.is_contiguous()) ? weight : weight.contiguous();
439-
return AtenIpexCPUDev::dil_convolution(input_temp, weight_temp, (bias.has_value() && bias.value().defined()) ? bias.value() : at::Tensor(), stride, padding, dilation, groups);
446+
auto weight_temp = (weight_dil_type == dil::data_type::s8 || IS_CONTIGUOUS_ANY(weight)) ? weight : weight.contiguous();
447+
return AtenIpexCPUDev::dil_convolution(
448+
input_temp,
449+
weight_temp,
450+
(bias.has_value() && bias.value().defined()) ? (IS_CONTIGUOUS_ANY(bias.value()) ? bias.value() : bias.value().contiguous()) : at::Tensor(),
451+
stride,
452+
padding,
453+
dilation,
454+
groups);
440455
}
441456
}
442457
}
@@ -472,9 +487,9 @@ std::tuple<at::Tensor,at::Tensor,at::Tensor> AtenIpexCPUDev::dil_convolution_bac
472487
if (dbl::chk::dnnl_support_the_tensors(dnnl_input_tensors)) {
473488
if (transposed) {
474489
return AtenIpexCPUDev::dil_deconvolution_backward(
475-
input.is_contiguous() ? input : input.contiguous(),
476-
grad_output.is_contiguous() ? grad_output : grad_output.contiguous(),
477-
weight.is_contiguous() ? weight : weight.contiguous(),
490+
IS_CONTIGUOUS_ANY(input) ? input : input.contiguous(),
491+
IS_CONTIGUOUS_ANY(grad_output) ? grad_output : grad_output.contiguous(),
492+
IS_CONTIGUOUS_ANY(weight) ? weight : weight.contiguous(),
478493
padding,
479494
output_padding,
480495
stride,
@@ -483,9 +498,9 @@ std::tuple<at::Tensor,at::Tensor,at::Tensor> AtenIpexCPUDev::dil_convolution_bac
483498
output_mask);
484499
} else {
485500
return AtenIpexCPUDev::dil_convolution_backward(
486-
input.is_contiguous() ? input : input.contiguous(),
487-
grad_output.is_contiguous() ? grad_output : grad_output.contiguous(),
488-
weight.is_contiguous() ? weight : weight.contiguous(),
501+
IS_CONTIGUOUS_ANY(input) ? input : input.contiguous(),
502+
IS_CONTIGUOUS_ANY(grad_output) ? grad_output : grad_output.contiguous(),
503+
IS_CONTIGUOUS_ANY(weight) ? weight : weight.contiguous(),
489504
padding,
490505
stride,
491506
dilation,
@@ -577,9 +592,9 @@ std::tuple<at::Tensor,at::Tensor,at::Tensor> AtenIpexCPUDev::cpu_deconvolution_b
577592

578593
std::vector<at::Tensor> g_input(groups), g_weight(groups), g_bias(groups);
579594

580-
_ipex_self = _ipex_self.is_contiguous() ? _ipex_self : _ipex_self.contiguous();
581-
_ipex_grad_output = _ipex_grad_output.is_contiguous() ? _ipex_grad_output : _ipex_grad_output.contiguous();
582-
_ipex_weight = _ipex_weight.is_contiguous() ? _ipex_weight : _ipex_weight.contiguous();
595+
_ipex_self = IS_CONTIGUOUS_ANY(_ipex_self) ? _ipex_self : _ipex_self.contiguous();
596+
_ipex_grad_output = IS_CONTIGUOUS_ANY(_ipex_grad_output) ? _ipex_grad_output : _ipex_grad_output.contiguous();
597+
_ipex_weight = IS_CONTIGUOUS_ANY(_ipex_weight) ? _ipex_weight : _ipex_weight.contiguous();
583598
for (int g = 0; g < groups; ++g) {
584599
auto _ipex_self_g = dbl::comm::subtensor(_ipex_self, 1, groups, g);
585600
auto _ipex_grad_output_g = dbl::comm::subtensor(_ipex_grad_output, 1, groups, g);
@@ -1315,7 +1330,7 @@ std::tuple<at::Tensor, at::Tensor, at::Tensor> AtenIpexCPUDev::dil_native_batch_
13151330
CHECK_DNNL_OP_PRE_COND(weight);
13161331

13171332
IPEX_CHECK(train, "mkldnn_batch_norm_backward: currently mkldnn only support train model");
1318-
auto grad_output_contiguous = grad_output.is_contiguous() ? grad_output : grad_output.contiguous();
1333+
auto grad_output_contiguous = IS_CONTIGUOUS_ANY(grad_output) ? grad_output : grad_output.contiguous();
13191334

13201335
dbl::comm::reorder_to_bf16_for_mix_prec(grad_output_contiguous, true);
13211336
dbl::comm::reorder_to_bf16_for_mix_prec(input, true);
@@ -1395,7 +1410,7 @@ at::Tensor AtenIpexCPUDev::dil_frozen_batch_norm_backward(const at::Tensor& grad
13951410
CHECK_DNNL_OP_PRE_COND(input);
13961411
CHECK_DNNL_OP_PRE_COND(weight);
13971412

1398-
auto grad_output_contiguous = grad_output.is_contiguous() ? grad_output : grad_output.contiguous();
1413+
auto grad_output_contiguous = IS_CONTIGUOUS_ANY(grad_output) ? grad_output : grad_output.contiguous();
13991414

14001415
dbl::comm::reorder_to_bf16_for_mix_prec(grad_output_contiguous, true);
14011416
dbl::comm::reorder_to_bf16_for_mix_prec(input, true);
@@ -1483,7 +1498,7 @@ at::Tensor AtenIpexCPUDev::dil_avg_pool2d(
14831498
}
14841499

14851500
return dbl::pool::_dil_pooling(
1486-
input.is_contiguous() ? input : input.contiguous(),
1501+
IS_CONTIGUOUS_ANY(input) ? input : input.contiguous(),
14871502
kernel_size,
14881503
stride,
14891504
padding,
@@ -1509,7 +1524,7 @@ at::Tensor AtenIpexCPUDev::dil_avg_pool3d(
15091524
dbl::comm::reorder_to_bf16_for_mix_prec(input, true);
15101525

15111526
return dbl::pool::_dil_pooling(
1512-
input.is_contiguous() ? input : input.contiguous(),
1527+
IS_CONTIGUOUS_ANY(input) ? input : input.contiguous(),
15131528
kernel_size,
15141529
stride,
15151530
padding,
@@ -1592,9 +1607,9 @@ at::Tensor AtenIpexCPUDev::dil_max_pooling_backward(
15921607
dbl::comm::reorder_to_bf16_for_mix_prec(input, true);
15931608

15941609
return dbl::pool::_dil_pooling_backward(
1595-
grad_output.is_contiguous() ? grad_output : grad_output.contiguous(),
1596-
output.is_contiguous() ? output : output.contiguous(),
1597-
input.is_contiguous() ? input : input.contiguous(),
1610+
IS_CONTIGUOUS_ANY(grad_output) ? grad_output : grad_output.contiguous(),
1611+
IS_CONTIGUOUS_ANY(output) ? output : output.contiguous(),
1612+
IS_CONTIGUOUS_ANY(input) ? input : input.contiguous(),
15981613
kernel_size,
15991614
stride,
16001615
padding,
@@ -1616,14 +1631,14 @@ at::Tensor AtenIpexCPUDev::dil_avg_pool2d_backward(
16161631
CHECK_DNNL_OP_PRE_COND(grad_output);
16171632
CHECK_DNNL_OP_PRE_COND(input);
16181633

1619-
auto grad_output_contiguous = grad_output.is_contiguous() ? grad_output : grad_output.contiguous();
1634+
auto grad_output_contiguous = IS_CONTIGUOUS_ANY(grad_output) ? grad_output : grad_output.contiguous();
16201635
dbl::comm::reorder_to_bf16_for_mix_prec(grad_output_contiguous, true);
16211636
dbl::comm::reorder_to_bf16_for_mix_prec(input, true);
16221637

16231638
return dbl::pool::_dil_pooling_backward(
16241639
grad_output_contiguous,
16251640
grad_output_contiguous,
1626-
input.is_contiguous() ? input : input.contiguous(),
1641+
IS_CONTIGUOUS_ANY(input) ? input : input.contiguous(),
16271642
kernel_size,
16281643
stride,
16291644
padding,
@@ -1646,15 +1661,15 @@ at::Tensor AtenIpexCPUDev::dil_avg_pool3d_backward(
16461661
CHECK_DNNL_OP_PRE_COND(grad_output);
16471662
CHECK_DNNL_OP_PRE_COND(input);
16481663

1649-
auto grad_output_contiguous = grad_output.is_contiguous() ? grad_output : grad_output.contiguous();
1664+
auto grad_output_contiguous = IS_CONTIGUOUS_ANY(grad_output) ? grad_output : grad_output.contiguous();
16501665
dbl::comm::reorder_to_bf16_for_mix_prec(grad_output_contiguous, true);
16511666
dbl::comm::reorder_to_bf16_for_mix_prec(input, true);
16521667

16531668
std::vector<int64_t> dilation{1, 1};
16541669
return dbl::pool::_dil_pooling_backward(
16551670
grad_output_contiguous,
16561671
grad_output_contiguous,
1657-
input.is_contiguous() ? input : input.contiguous(),
1672+
IS_CONTIGUOUS_ANY(input) ? input : input.contiguous(),
16581673
kernel_size,
16591674
stride,
16601675
padding,
@@ -1696,7 +1711,7 @@ at::Tensor AtenIpexCPUDev::dil_adaptive_avg_pool2d_backward(
16961711
return dbl::pool::_dil_pooling_backward(
16971712
grad_output,
16981713
grad_output,
1699-
input.is_contiguous() ? input : input.contiguous(),
1714+
IS_CONTIGUOUS_ANY(input) ? input : input.contiguous(),
17001715
kernel_size,
17011716
/*stride*/ kernel_size,
17021717
/*padding*/ padding,
@@ -1777,7 +1792,7 @@ at::Tensor AtenIpexCPUDev::dil_threshold_backward(const at::Tensor& grad_output,
17771792
CHECK_DNNL_OP_PRE_COND(grad_output);
17781793
CHECK_DNNL_OP_PRE_COND(input);
17791794

1780-
auto grad_output_contiguous = grad_output.is_contiguous() ? grad_output : grad_output.contiguous();
1795+
auto grad_output_contiguous = IS_CONTIGUOUS_ANY(grad_output) ? grad_output : grad_output.contiguous();
17811796
dbl::comm::reorder_to_bf16_for_mix_prec(grad_output_contiguous, true);
17821797
dbl::comm::reorder_to_bf16_for_mix_prec(input, true);
17831798

@@ -1819,7 +1834,7 @@ at::Tensor AtenIpexCPUDev::dil__softmax_backward_data(
18191834
CHECK_DNNL_OP_PRE_COND(output);
18201835
CHECK_DNNL_OP_PRE_COND(self);
18211836

1822-
auto grad_output_contiguous = grad_output.is_contiguous() ? grad_output : grad_output.contiguous();
1837+
auto grad_output_contiguous = IS_CONTIGUOUS_ANY(grad_output) ? grad_output : grad_output.contiguous();
18231838
dbl::comm::reorder_to_bf16_for_mix_prec(grad_output_contiguous, true);
18241839
dbl::comm::reorder_to_bf16_for_mix_prec(output, true);
18251840
dbl::comm::reorder_to_bf16_for_mix_prec(self, true);
@@ -1861,7 +1876,7 @@ at::Tensor AtenIpexCPUDev::dil__log_softmax_backward_data(
18611876
CHECK_DNNL_OP_PRE_COND(output);
18621877
CHECK_DNNL_OP_PRE_COND(self);
18631878

1864-
auto grad_output_contiguous = grad_output.is_contiguous() ? grad_output : grad_output.contiguous();
1879+
auto grad_output_contiguous = IS_CONTIGUOUS_ANY(grad_output) ? grad_output : grad_output.contiguous();
18651880
dbl::comm::reorder_to_bf16_for_mix_prec(grad_output_contiguous, true);
18661881
dbl::comm::reorder_to_bf16_for_mix_prec(output, true);
18671882
dbl::comm::reorder_to_bf16_for_mix_prec(self, true);
@@ -1909,7 +1924,7 @@ at::Tensor AtenIpexCPUDev::dil_sigmoid_backward(
19091924
CHECK_DNNL_OP_PRE_COND(grad_output);
19101925
CHECK_DNNL_OP_PRE_COND(output);
19111926

1912-
auto grad_output_contiguous = grad_output.is_contiguous() ? grad_output : grad_output.contiguous();
1927+
auto grad_output_contiguous = IS_CONTIGUOUS_ANY(grad_output) ? grad_output : grad_output.contiguous();
19131928
dbl::comm::reorder_to_bf16_for_mix_prec(grad_output_contiguous, true);
19141929
dbl::comm::reorder_to_bf16_for_mix_prec(output, true);
19151930

@@ -1956,7 +1971,7 @@ at::Tensor AtenIpexCPUDev::dil_tanh_backward(
19561971
CHECK_DNNL_OP_PRE_COND(grad_output);
19571972
CHECK_DNNL_OP_PRE_COND(output);
19581973

1959-
auto grad_output_contiguous = grad_output.is_contiguous() ? grad_output : grad_output.contiguous();
1974+
auto grad_output_contiguous = IS_CONTIGUOUS_ANY(grad_output) ? grad_output : grad_output.contiguous();
19601975
dbl::comm::reorder_to_bf16_for_mix_prec(grad_output_contiguous, true);
19611976
dbl::comm::reorder_to_bf16_for_mix_prec(output, true);
19621977

@@ -2084,7 +2099,7 @@ at::Tensor AtenIpexCPUDev::dil_cat(at::TensorList tensors, int64_t dim) {
20842099
for (auto i = 0; i < tensors.size(); i++) {
20852100
IPEX_CHECK(!(tensors[i].dim() == 1 && tensors[i].sizes()[0] == 0),
20862101
"Currently Mkldnn cat operators do not support empty tensor.");
2087-
tensors_contiguous[i] = tensors[i].is_contiguous() ? tensors[i] : tensors[i].contiguous();
2102+
tensors_contiguous[i] = IS_CONTIGUOUS_ANY(tensors[i]) ? tensors[i] : tensors[i].contiguous();
20882103

20892104
dbl::comm::reorder_to_bf16_for_mix_prec(tensors_contiguous[i], true);
20902105

@@ -2448,7 +2463,7 @@ at::Tensor AtenIpexCPUDev::dil_gelu_backward(const at::Tensor& grad_output, cons
24482463

24492464
dbl::comm::reorder_to_bf16_for_mix_prec(input, true);
24502465

2451-
auto grad_output_contiguous = grad_output.is_contiguous() ? grad_output : grad_output.contiguous();
2466+
auto grad_output_contiguous = IS_CONTIGUOUS_ANY(grad_output) ? grad_output : grad_output.contiguous();
24522467
dbl::comm::reorder_to_bf16_for_mix_prec(grad_output_contiguous, true);
24532468

24542469
dil::tensor x = dbl::comm::try_gen_dil_tensor(input);

torch_ipex/csrc/cpu/ExtendOPs.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ void AtenIpexTypeExt::packed_add_(at::Tensor &top_half, at::Tensor &bot_half,
3636
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(bot_half.is_contiguous());
3737

3838
#if defined(IPEX_PROFILE_OP)
39-
RECORD_FUNCTION("packed_add_", std::vector<c10::IValue>({top_half, bot_half, grad, alpha}));
39+
RECORD_FUNCTION("packed_add_", std::vector<c10::IValue>({}));
4040
#endif
4141

4242
if (grad.is_sparse()) {
@@ -238,7 +238,7 @@ static inline void mm_backward(at::BFloat16 *out, const at::BFloat16 *in1,
238238
template <typename T>
239239
inline at::Tensor _interaction_forward(const std::vector<at::Tensor> &input) {
240240
#if defined(IPEX_PROFILE_OP)
241-
RECORD_FUNCTION("_interaction_forward", std::vector<c10::IValue>({input}));
241+
RECORD_FUNCTION("_interaction_forward", std::vector<c10::IValue>({}));
242242
#endif
243243
uint32_t total_feature_size = 0;
244244
int64_t batch_size = input[0].sizes()[0];
@@ -291,7 +291,7 @@ _interaction_backward(const at::Tensor &grad_out,
291291
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(grad_out.is_contiguous());
292292
#if defined(IPEX_PROFILE_OP)
293293
RECORD_FUNCTION("_interaction_backward",
294-
std::vector<c10::IValue>({grad_out, input}));
294+
std::vector<c10::IValue>({}));
295295
#endif
296296
uint32_t total_feature_size = 0;
297297
int64_t batch_size = input[0].sizes()[0];
@@ -469,7 +469,7 @@ std::vector<at::Tensor> rnn_layer(const at::Tensor& input,
469469
int64_t hidden_size, int64_t num_layers, bool train,
470470
bool bidirectional, at::IntArrayRef batch_sizes,
471471
const std::vector<float>& scales,
472-
const std::vector<int32_t>& shift,
472+
const std::vector<int32_t>& shift,
473473
bool quantized) {
474474
TORCH_CHECK(weights.size() == 2 || weights.size() == 4);
475475
if (weights.size() == 4) {
@@ -523,7 +523,7 @@ std::vector<at::Tensor> rnn(
523523
// no need to do calibration for the output in lstm, will use the scale & zero point of the input
524524
// to dequantize the output from u8 to f32, need to add an "output" here but actually unused
525525
// For LSTM, we only need to calibrate the input to the first layer
526-
// TODO: add int8 for gru and rnn.
526+
// TODO: add int8 for gru and rnn.
527527
if (check_auto_mix_int8_fp32() && check_int8_calibration() && static_cast<dil::rnn_kind>(mode) == dil::rnn_kind::LSTM) {
528528
int64_t num_ops_id = Int8OptConfig::fetch_and_add_ops_id();
529529
insert_or_updata_observer({input}, {input}, "lstm", num_ops_id, /*asymmetric*/true);
@@ -625,7 +625,7 @@ std::vector<at::Tensor> AtenIpexTypeExt::gru(
625625
at::Tensor AtenIpexTypeExt::linear_relu(const at::Tensor &input,
626626
const at::Tensor &weight,
627627
const c10::optional<at::Tensor> &bias) {
628-
if (bias.has_value())
628+
if (bias.has_value())
629629
return cpu::AtenIpexJITDev::dil_linear_fuse_eltwise(input, weight, bias.value(), dil::attr_t::fuse_relu());
630630
return cpu::AtenIpexJITDev::dil_linear_fuse_eltwise(input, weight, at::Tensor(), dil::attr_t::fuse_relu());
631631
}

0 commit comments

Comments
 (0)