Skip to content

Commit 31ffb6a

Browse files
record gelu data flow for int8 fusion path (#16)
1 parent 97d4606 commit 31ffb6a

File tree

6 files changed

+39
-3
lines changed

6 files changed

+39
-3
lines changed

intel_pytorch_extension_py/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def save(self, configure_file, default_recipe=True):
2929
json.dump(configures, fp, indent = 4)
3030

3131
def get_default_recipe(self, configures):
32-
elt_wise = ['relu', 'sigmoid']
32+
elt_wise = ['relu', 'sigmoid', 'gelu']
3333
inplace_ops = ['relu_', 'add_']
3434
# get default recipe,
3535
# q+dq+conv+q+dq+relu => q+dq+conv+relu

torch_ipex/csrc/autocast_kernel.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,5 +215,15 @@ at::Tensor dropout(const at::Tensor& input, double p, bool train) {
215215
return at::dropout(input, p, train);
216216
}
217217

218+
at::Tensor gelu(const at::Tensor& input) {
219+
c10::impl::ExcludeDispatchKeyGuard no_autocastCPU(DispatchKey::AutocastCPU);
220+
auto target_type = get_autocast_dtype();
221+
if (at::ScalarType::Char == target_type) {
222+
return int8::gelu(input);
223+
}
224+
// convert to fp32 path.
225+
return at::gelu(cpu_cached_cast(at::kFloat, input));
226+
}
227+
218228
} // autocast
219229
} // torch_ipex

torch_ipex/csrc/autocast_kernel.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,5 +47,7 @@ at::Tensor add_tensor(const at::Tensor& input, const at::Tensor& other, const at
4747

4848
at::Tensor dropout(const at::Tensor& input, double p, bool train);
4949

50+
at::Tensor gelu(const at::Tensor& input);
51+
5052
} // autocast
5153
} // torch_ipex

torch_ipex/csrc/autocast_mode.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,6 @@ MAKE_REGISTER_FUNC(ADD_NS(addbmm), "addbmm", Tensor (const Tensor &, const Tenso
181181
MAKE_REGISTER_FUNC(ADD_NS(convolution), "convolution", Tensor (const Tensor &, const Tensor &, const c10::optional<Tensor>&, IntArrayRef, IntArrayRef, IntArrayRef, bool, IntArrayRef, int64_t), fp32)
182182
MAKE_REGISTER_FUNC(ADD_NS(avg_pool2d), "avg_pool2d", Tensor (const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, bool, bool, c10::optional<int64_t>), fp32)
183183
MAKE_REGISTER_FUNC(ADD_NS(avg_pool3d), "avg_pool3d", Tensor (const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, bool, bool, c10::optional<int64_t>), fp32)
184-
MAKE_REGISTER_FUNC(ADD_NS(gelu), "gelu", Tensor (const Tensor &), fp32)
185184
MAKE_REGISTER_FUNC(ADD_NS(upsample_nearest1d), "upsample_nearest1d", Tensor (const Tensor &, IntArrayRef, c10::optional<double>), fp32)
186185
MAKE_REGISTER_FUNC(ADD_NS(upsample_nearest1d), "upsample_nearest1d.vec", Tensor (const Tensor &, c10::optional<IntArrayRef>, c10::optional<ArrayRef<double>>), fp32)
187186
MAKE_REGISTER_FUNC(ADD_NS(upsample_nearest2d), "upsample_nearest2d", Tensor (const Tensor &, IntArrayRef, c10::optional<double>, c10::optional<double>), fp32)
@@ -250,7 +249,7 @@ TORCH_LIBRARY_IMPL(aten, AutocastCPU, m){
250249
m.impl(TORCH_SELECTIVE_NAME("aten::add_.Tensor"), TORCH_FN((&torch_ipex::autocast::add_tensor_)));
251250
m.impl(TORCH_SELECTIVE_NAME("aten::add.Tensor"), TORCH_FN((&torch_ipex::autocast::add_tensor)));
252251
m.impl(TORCH_SELECTIVE_NAME("aten::dropout"), TORCH_FN((&torch_ipex::autocast::dropout)));
253-
252+
m.impl(TORCH_SELECTIVE_NAME("aten::gelu"), TORCH_FN((&torch_ipex::autocast::gelu)));
254253
}
255254

256255
} // namespace autocast

torch_ipex/csrc/quantization/AutoCast.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,29 @@ at::Tensor dropout(const at::Tensor &input, double p, bool train) {
637637
return at::dropout(input, p, train);
638638
}
639639

640+
at::Tensor gelu(const at::Tensor &input) {
641+
auto op_id = torch_ipex::Int8OptConfig::fetch_and_add_ops_id();
642+
if (check_int8_calibration()) {
643+
auto it = tensors_flow.find(input.unsafeGetTensorImpl());
644+
std::vector<std::string> op_inputs, op_outputs;
645+
if (it == tensors_flow.end()) {
646+
std::string op_input = "gelu." + std::to_string(op_id) + ".input";
647+
op_inputs.push_back(op_input);
648+
} else {
649+
op_inputs.push_back(std::get<1>(it->second));
650+
}
651+
652+
auto output = at::gelu(input);
653+
std::string op_output = "gelu." + std::to_string(op_id) + ".output";
654+
op_outputs.push_back(op_output);
655+
tensors_flow.emplace(output.unsafeGetTensorImpl(),
656+
val_name{weakref_scales(output.getIntrusivePtr()), op_output});
657+
torch_ipex::insert_or_updata_observer({input}, {output}, "gelu",
658+
op_id, op_inputs, op_outputs);
659+
return output;
660+
}
661+
}
662+
640663
} // namespace autocast
641664
} // namespace cpu
642665
} // namespace torch_ipex

torch_ipex/csrc/quantization/AutoCast.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ at::Tensor add_tensor(const at::Tensor& input, const at::Tensor& other, const at
5050

5151
at::Tensor dropout(const at::Tensor &input, double p, bool train);
5252

53+
at::Tensor gelu(const at::Tensor& input);
54+
5355
} // namespace int8
5456
} // namespace autocast
5557
} // namespace torch_ipex

0 commit comments

Comments
 (0)