3
3
#include " CustomOPs.h"
4
4
#include " DevOPs.h"
5
5
#include " FusionOPs.h"
6
+ #include " dbl/Common.h"
6
7
#include " aten/aten.hpp"
7
8
#include " bf16/vec/bf16_vec_kernel.h"
8
9
#include " dil/dil.hpp"
10
+ #include " torch_ipex/csrc/cpu/int8/Config.h"
9
11
#include " xsmm/libxsmm_utils.h"
10
12
#include < ATen/Parallel.h>
11
13
#include < ATen/MatrixRef.h>
@@ -465,16 +467,19 @@ std::vector<at::Tensor> rnn_layer(const at::Tensor& input,
465
467
at::TensorList weights, const at::Tensor& hx,
466
468
const at::Tensor& cx, bool reverse, int64_t mode,
467
469
int64_t hidden_size, int64_t num_layers, bool train,
468
- bool bidirectional, at::IntArrayRef batch_sizes) {
470
+ bool bidirectional, at::IntArrayRef batch_sizes,
471
+ const std::vector<float >& scales,
472
+ const std::vector<int32_t >& shift,
473
+ bool quantized) {
469
474
TORCH_CHECK (weights.size () == 2 || weights.size () == 4 );
470
475
if (weights.size () == 4 ) {
471
476
if (at::GradMode::is_enabled ())
472
477
return NewRNNLayerOp::apply (input, weights[0 ], weights[1 ], weights[2 ], weights[3 ], hx, cx, reverse, mode, hidden_size, num_layers, true , train, bidirectional, batch_sizes);
473
- return NewRNNLayerOp::_forward (input, weights[0 ], weights[1 ], weights[2 ], weights[3 ], hx, cx, reverse, mode, hidden_size, num_layers, true , train, bidirectional, batch_sizes);
478
+ return NewRNNLayerOp::_forward (input, weights[0 ], weights[1 ], weights[2 ], weights[3 ], hx, cx, reverse, mode, hidden_size, num_layers, true , train, bidirectional, batch_sizes, scales, shift, quantized );
474
479
} else {
475
480
if (at::GradMode::is_enabled ())
476
481
return NewRNNLayerOp::apply (input, weights[0 ], weights[1 ], at::zeros (weights[0 ].sizes (), weights[0 ].options ()), at::zeros (weights[1 ].sizes (), weights[1 ].options ()), hx, cx, reverse, mode, hidden_size, num_layers, false , train, bidirectional, batch_sizes);
477
- return NewRNNLayerOp::_forward (input, weights[0 ], weights[1 ], at::zeros (weights[0 ].sizes (), weights[0 ].options ()), at::zeros (weights[1 ].sizes (), weights[1 ].options ()), hx, cx, reverse, mode, hidden_size, num_layers, false , train, bidirectional, batch_sizes);
482
+ return NewRNNLayerOp::_forward (input, weights[0 ], weights[1 ], at::zeros (weights[0 ].sizes (), weights[0 ].options ()), at::zeros (weights[1 ].sizes (), weights[1 ].options ()), hx, cx, reverse, mode, hidden_size, num_layers, false , train, bidirectional, batch_sizes, scales, shift, quantized );
478
483
}
479
484
}
480
485
// MKLDNN RNN integration notes:
@@ -514,6 +519,27 @@ std::vector<at::Tensor> rnn(
514
519
at::MatrixRef<at::Tensor> weights{weight, static_cast <size_t >(weight_stride0)};
515
520
516
521
auto num_directions = bidirectional ? 2 : 1 ;
522
+
523
+ // no need to do calibration for the output in lstm, will use the scale & zero point of the input
524
+ // to dequantize the output from u8 to f32, need to add an "output" here but actually unused
525
+ // For LSTM, we only need to calibrate the input to the first layer
526
+ // TODO: add int8 for gru and rnn.
527
+ if (check_auto_mix_int8_fp32 () && check_int8_calibration () && static_cast <dil::rnn_kind>(mode) == dil::rnn_kind::LSTM) {
528
+ int64_t num_ops_id = Int8OptConfig::fetch_and_add_ops_id ();
529
+ insert_or_updata_observer ({input}, {input}, " lstm" , num_ops_id, /* asymmetric*/ true );
530
+ }
531
+
532
+ bool quantized = false ;
533
+ std::vector<std::vector<float >> scales = {};
534
+ std::vector<std::vector<int32_t >> shift = {};
535
+ if (check_auto_mix_int8_fp32 () && !check_int8_calibration () && static_cast <dil::rnn_kind>(mode) == dil::rnn_kind::LSTM) {
536
+ int64_t num_ops_id = Int8OptConfig::fetch_and_add_ops_id ();
537
+ quantized = torch_ipex::cpu::dbl::comm::get_int8_quantized_status (num_ops_id);
538
+ std::tie (scales, shift) = torch_ipex::cpu::dbl::comm::get_int8_asymmetric (num_ops_id);
539
+ IPEX_CHECK (scales.size () > 0 , " incorrect scale size" );
540
+ IPEX_CHECK (shift.size () > 0 , " incorrect shift size" );
541
+ }
542
+
517
543
auto layer_input = input;
518
544
std::vector<at::Tensor> layer_output (num_directions);
519
545
std::vector<at::Tensor> layer_hy (num_layers * num_directions);
@@ -525,7 +551,7 @@ std::vector<at::Tensor> rnn(
525
551
auto layer_hx = hx[index];
526
552
auto layer_cx = cx[index];
527
553
auto reverse = (direction > 0 );
528
- auto outputs = rnn_layer (layer_input, layer_weights, layer_hx, layer_cx, reverse, mode, hidden_size, num_layers, train, bidirectional, batch_sizes);
554
+ auto outputs = rnn_layer (layer_input, layer_weights, layer_hx, layer_cx, reverse, mode, hidden_size, num_layers, train, bidirectional, batch_sizes, scales[ 0 ], shift[ 0 ], quantized );
529
555
layer_output[direction] = outputs[0 ];
530
556
layer_hy[index] = outputs[1 ];
531
557
layer_cy[index] = outputs[2 ];
0 commit comments