intel
diff --git a/‎intel_pytorch_extension_py/__init__.py
Lines changed: 1 addition & 0 deletions b/‎intel_pytorch_extension_py/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎intel_pytorch_extension_py/amp/__init__.py
Lines changed: 1 addition & 1 deletion b/‎intel_pytorch_extension_py/amp/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎intel_pytorch_extension_py/amp/autocast_mode.py
Lines changed: 8 additions & 66 deletions b/‎intel_pytorch_extension_py/amp/autocast_mode.py
Lines changed: 8 additions & 66 deletions
diff --git a/‎intel_pytorch_extension_py/conf.py
Lines changed: 11 additions & 104 deletions b/‎intel_pytorch_extension_py/conf.py
Lines changed: 11 additions & 104 deletions
diff --git a/‎intel_pytorch_extension_py/quantization/__init__.py
Lines changed: 1 addition & 0 deletions b/‎intel_pytorch_extension_py/quantization/__init__.py
Lines changed: 1 addition & 0 deletions
@@ -14,3 +14,4 @@
 from .optimizer_utils import *
 from .weight_cast import *
 from .optim import *
+from .quantization import *
@@ -1,2 +1,2 @@
-from .autocast_mode import autocast, calibrate
+from .autocast_mode import *
 
@@ -2,80 +2,22 @@
 import functools
 import warnings
 import numpy as np
-#from torch._six import container_abcs, string_classes
 import _torch_ipex as core
-from .. import conf
-
-class autocast(object):
-    def __init__(self, enabled=True, configure=conf.AmpConf(torch.bfloat16)):
-        supported_dtype = [torch.bfloat16, torch.int8]
-        if configure.dtype not in supported_dtype :
-            warnings.warn("In CPU autocast, but the target dtype is not supported. Disable the autocast.")
-            warnings.warn("Supported dtype input is: torch.bfloat16, torch.int8.")
-            enabled = False
-            configure = conf.AmpConf(torch.bfloat16)
-        self._enabled = enabled
-        self._dtype = configure.dtype
 
+class _autocast_bf16(torch.cpu.amp.autocast):
     def __enter__(self):
-        self.prev = core.is_autocast_enabled()
-        self.prev_dtype = core.get_autocast_dtype()
-        self.pre_calibration_state = core.get_int8_calibration()
-        core.set_autocast_enabled(self._enabled)
+        self.prev = torch.is_autocast_cpu_enabled()
+        self.prev_dtype = torch.get_autocast_cpu_dtype()
+        torch.set_autocast_cpu_enabled(self._enabled)
         core.set_autocast_dtype(self._dtype)
-        core.autocast_increment_nesting()
-        if torch.int8 == self._dtype:
-            core.disable_int8_calibration()
-
-    def __exit__(self, *args):
-        # Drop the cache when we exit to a nesting level that's outside any instance of autocast.
-        if core.autocast_decrement_nesting() == 0:
-            core.clear_autocast_cache()
-            core.clear_autocast_cache_int8()
-        core.set_autocast_enabled(self.prev)
-        core.set_autocast_dtype(self.prev_dtype)
-        if torch.int8 == self._dtype:
-            if self.pre_calibration_state:
-                core.enable_int8_calibration()
-            else:
-                core.disable_int8_calibration()
-        return False
-
-    def __call__(self, func):
-        @functools.wraps(func)
-        def decorate_autocast(*args, **kwargs):
-            with self:
-                return func(*args, **kwargs)
-        return decorate_autocast
-
-class calibrate(object):
-    def __init__(self):
-        self.pre_calibration_state = core.get_int8_calibration()
-
-    def __enter__(self):
-        self.prev = core.is_autocast_enabled()
-        self.prev_dtype = core.get_autocast_dtype()
-        core.set_autocast_enabled(True)
-        core.set_autocast_dtype(torch.int8)
-        core.autocast_increment_nesting()
-        core.enable_int8_calibration()
+        torch.autocast_increment_nesting()
 
     def __exit__(self, *args):
         # Drop the cache when we exit to a nesting level that's outside any instance of autocast.
-        if core.autocast_decrement_nesting() == 0:
+        if torch.autocast_decrement_nesting() == 0:
             core.clear_autocast_cache()
-        core.set_autocast_enabled(self.prev)
+        torch.set_autocast_cpu_enabled(self.prev)
         core.set_autocast_dtype(self.prev_dtype)
-        core.calibration_reset()
-        if self.pre_calibration_state:
-            core.enable_int8_calibration()
-        else:
-            core.disable_int8_calibration()
         return False
 
-    def __call__(self, func):
-        @functools.wraps(func)
-        def decorate_autocast(*args, **kwargs):
-            with self:
-                return func(*args, **kwargs)
-        return decorate_autocast
+torch.cpu.amp.autocast = _autocast_bf16
@@ -3,126 +3,33 @@
 import torch
 import _torch_ipex as core
 
-
 qscheme_dict ={torch.per_tensor_affine:0,
                torch.per_channel_affine:1,
                torch.per_tensor_symmetric:2,
                torch.per_channel_symmetric:3,
                torch.torch.per_channel_affine_float_qparams:4}
 
-class AmpConf(object):
-    def __init__(self, mixed_dtype=torch.bfloat16, configure_file=None, qscheme=torch.per_tensor_affine):
-        self.dtype = mixed_dtype
+class QuantConf(object):
+    def __init__(self, configure_file=None, qscheme=torch.per_tensor_affine):
         self.configure_file = configure_file
 
-        if self.dtype == torch.int8:
-            core.clear_indicators()
-            assert qscheme in [torch.per_tensor_affine, torch.per_tensor_symmetric], \
-                "qscheme is only support torch.per_tensor_affine and torch.per_tensor_symmetric now"
-            core.set_int8_qscheme(qscheme_dict[qscheme])
+        core.clear_indicators()
+        assert qscheme in [torch.per_tensor_affine, torch.per_tensor_symmetric], \
+            "qscheme is only support torch.per_tensor_affine and torch.per_tensor_symmetric now"
+        core.set_int8_qscheme(qscheme_dict[qscheme])
 
-        # for int8 path, if user give a exited configure file, load it.
-        if self.configure_file != None and self.dtype == torch.int8:
+        # if user provides an existing configuration file, load it
+        if self.configure_file != None:
             if os.path.exists(self.configure_file) and os.stat(self.configure_file).st_size != 0:
                 with open(self.configure_file, 'r') as f:
                     configures = json.load(f)
                     core.load_indicators_file(configures)
             else:
                 assert False, 'Can not load a empty file or none existed file, plese first do calibartion step'
 
-    # for int8 quantization, will save the date after doing calibration step.
-    def save(self, configure_file, default_recipe=True):
-        core.add_indicators()
+    def save(self, configure_file):
         configures = core.get_int8_configures()
-        if default_recipe:
-            configures = self.get_default_recipe(configures)
         with open(configure_file, 'w') as fp:
             json.dump(configures, fp, indent = 4)
-
-    def get_default_recipe(self, configures):
-        elt_wise = ['relu', 'sigmoid', 'gelu']
-        inplace_ops = ['relu_', 'add_']
-        shape_ops = ['flatten']
-        # get default recipe,
-        # q+dq+conv+q+dq+relu => q+dq+conv+relu
-        # q+dq+op1+q+dq+q+dq+op2+q+dq => q+dq+op1+q+dq+op2+q+dq
-        default_configures = configures
-        num_ops = len(default_configures)
-        for cur_id in range(num_ops):
-            cur_op = default_configures[cur_id]['name']
-            if cur_op == 'dropout':
-                continue
-            inputs = default_configures[cur_id]['inputs_flow']
-            num_input = len(inputs)
-            pre_ops = {}
-            for i_num in range(num_input):
-                inp = inputs[i_num]
-                for pre_id in range(cur_id):
-                    pre_op = default_configures[pre_id]['name']
-                    pre_out = default_configures[pre_id]['outputs_flow']
-                    num_out= len(pre_out)
-                    for o_num in range(num_out):
-                        # pre_op+qu+dequ+qu+dequ+cur_op+qu+dequ -> pre_op+qu+dequ+cur_op+qu+dequ.
-                        # for relu, sigmoid or other elt_wise ops, id pre_op is conv, linear, then
-                        # remove qu+dequ between them for fusion: pre_op+cur_op+qu_dequ.
-                        if pre_out[o_num] == inp:
-                            if (cur_op not in inplace_ops) \
-                                    or (cur_op in inplace_ops and \
-                                        (pre_op == 'conv2d' or pre_op == 'conv3d' or pre_op == 'linear')):
-                                if pre_op not in inplace_ops and pre_op != 'dropout':
-                                    default_configures[pre_id]['outputs_quantized'][o_num] = False
-                            if cur_op in elt_wise \
-                                    and (pre_op == 'conv2d' or pre_op == 'conv3d' or pre_op == 'linear' or pre_op == 'add'):
-                                default_configures[cur_id]['inputs_quantized'][i_num] = False
-                            if cur_op == 'add':
-                                pre_ops[i_num] = pre_op
-                            if cur_op in shape_ops:
-                                # for pooling case, the input and output always has same scale and zero point,
-                                # if the pooling's post ops is flatten, need sync flatten's input and output's
-                                # scale and zero point to pooling.
-                                if pre_op in ['max_pool2d', 'adaptive_avg_pool2d']:
-                                    default_configures[cur_id]['input_scales'][i_num] = default_configures[pre_id]['output_scales'][o_num]
-                                    default_configures[cur_id]['input_zero_points'][i_num] = default_configures[pre_id]['output_zero_points'][o_num]
-                                    default_configures[cur_id]['output_scales'][i_num] = default_configures[pre_id]['output_scales'][o_num]
-                                    default_configures[cur_id]['output_zero_points'][i_num] = default_configures[pre_id]['output_zero_points'][o_num]
-                            if pre_op in shape_ops:
-                                # if pre op is flatten, sync the input's scale and zero point to flatten.
-                                default_configures[cur_id]['input_scales'][i_num] = default_configures[pre_id]['output_scales'][o_num]
-                                default_configures[cur_id]['input_zero_points'][i_num] = default_configures[pre_id]['output_zero_points'][o_num]
-            # conv            op        conv         op
-            #    \            /          \           /
-            #     q          q            \         q
-            #      \        /      =>      \       /
-            #       dq     dq               \     dq
-            #         \   /                  \   /
-            #          add                    add
-            if len(pre_ops) > 0:
-                for key, value in pre_ops.items():
-                    if value == 'conv2d' or value == 'conv3d' or value == 'linear':
-                        default_configures[cur_id]['inputs_quantized'][key] = False
-                        break
-
-            # if add pre_op hasn't conv and linear, not need add q, dq for accuracy.
-            pre_inputs = pre_ops.values()
-            if cur_op == 'add' and \
-                    ('conv2d' not in pre_inputs and 'conv3d' not in pre_inputs and 'linear' not in pre_inputs):
-                default_configures[cur_id]['inputs_quantized'][0] = False
-                default_configures[cur_id]['inputs_quantized'][1] = False
-
-        # post process for add, linear, if cur op hasn't post quantized op, i.e. 'outputs_quantized' is True,
-        # for good perfromance, the default recipe:
-        # int8_input -> op -> q -> dq will converted to int8_input -> op.
-        ops_remove_q_dq_after = ['add', 'linear', 'conv2d']
-        # post process for flatten, if flatten's pre-pop and post op are fp32 op, don't need add q and dq
-        # before and after it.
-        ops_remove_q_dq_before_after = ['flatten']
-        for cur_id in range(num_ops):
-            cur_op = default_configures[cur_id]['name']
-            if cur_op in ops_remove_q_dq_after and default_configures[cur_id]['outputs_quantized'][0]:
-                default_configures[cur_id]['outputs_quantized'][0] = False
-            if cur_op in ops_remove_q_dq_before_after and default_configures[cur_id]['inputs_quantized'][0] \
-                    and default_configures[cur_id]['outputs_quantized'][0]:
-                default_configures[cur_id]['inputs_quantized'][0] = False
-                default_configures[cur_id]['outputs_quantized'][0] = False
-
-        return default_configures
+        # clear indicators after saved
+        core.clear_indicators()
@@ -0,0 +1 @@
+from .quantization_utils import calibrate, convert
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`		`-from .autocast_mode import autocast, calibrate`
	`1`	`+from .autocast_mode import *`
`2`	`2`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+from .quantization_utils import calibrate, convert`