[LLGA] map div to llga and change scalar input to tensor (#192)

chunyuan-w · web-flow · commit 688dadc46ff9 · 2021-09-07T14:16:16.000+08:00
diff --git a/tests/cpu/test_jit_llga_quantization_fuser.py b/tests/cpu/test_jit_llga_quantization_fuser.py
@@ -400,6 +400,77 @@ def forward(self, x):
         self.assertFused(graph, ['aten::_convolution', 'aten::relu', 'aten::quantize_per_channel'])
         self.checkPatterns(graph, patterns)
 
+    @llga_test_env
+    def test_bmm_div_scalar(self):
+        class M(nn.Module):
+            def __init__(self, div_value):
+                super(M, self).__init__()
+                self.div_value = div_value
+
+            def forward(self, x, y):
+                mm_res = torch.matmul(x, y)
+                return mm_res.div(self.div_value)
+
+        x = torch.randn(128, 16, 384, 64)
+        y = torch.randn(128, 1, 64, 384)
+        patterns = [
+                ["aten::dequantize", "aten::matmul", "aten::div"],
+        ]    
+        m = M(8.)
+        graph = self.checkQuantizeTrace(m, [x, y], atol=2e-1, config_name="bmm_div_scalar", qscheme=torch.per_tensor_affine)
+        # TODO: enable the below check when matmul-div fusion is supported in the backend
+        # self.assertGraphContainsExactly(graph, LLGA_FUSION_GROUP, 1)
+        # self.assertFused(graph, ['aten::matmul', 'aten::div'])
+        # self.checkPatterns(graph, patterns)
+
+    @llga_test_env
+    def test_bmm_div_identity(self):
+        class M(nn.Module):
+            def __init__(self, div_value):
+                super(M, self).__init__()
+                self.div_value = div_value
+
+            def forward(self, x, y):
+                mm_res = torch.matmul(x, y)
+                return mm_res.div(self.div_value)
+
+        x = torch.randn(128, 16, 384, 64)
+        y = torch.randn(128, 1, 64, 384)
+        patterns = [
+                ["aten::dequantize", "aten::matmul"],
+        ]  
+        m = M(1.)
+        graph = self.checkQuantizeTrace(m, [x, y], atol=2e-1, config_name="bmm_div_identity", qscheme=torch.per_tensor_affine)
+        # divide by 1 should be removed by Constant Propagation
+        self.assertGraphContainsExactly(graph, "aten::div", 0, consider_subgraphs=True)
+        self.assertGraphContainsExactly(graph, LLGA_FUSION_GROUP, 1)
+        self.assertFused(graph, ['aten::matmul'])
+        # TODO: enable this check when int8 matmul is supported in the backend
+        # self.checkPatterns(graph, patterns)
+
+    @llga_test_env
+    def test_bmm_div_tensor(self):
+        class M(nn.Module):
+            def __init__(self):
+                super(M, self).__init__()
+
+            def forward(self, x, y, z):
+                mm_res = torch.matmul(x, y)
+                return mm_res.div(z)
+
+        x = torch.randn(128, 16, 384, 64)
+        y = torch.randn(128, 1, 64, 384)
+        patterns = [
+                ["aten::dequantize", "aten::matmul", "aten::div"],
+        ]   
+        for z in [torch.randn(384), torch.randn(128, 16, 384, 384)]:
+            m = M()
+            graph = self.checkQuantizeTrace(m, [x, y, z], atol=2e-1, config_name="bmm_div_tensor", qscheme=torch.per_tensor_affine)
+            # TODO: enable the below check when matmul-div fusion is supported in the backend
+            # self.assertGraphContainsExactly(graph, LLGA_FUSION_GROUP, 1)
+            # self.assertFused(graph, ['aten::matmul', 'aten::div'])
+            # self.checkPatterns(graph, patterns)
+
 class TestShapeFallback(JitLlgaTestCase):
     @unittest.skipIf(True, 'Size peephole optimization not enabled yet')
     @llga_test_env
diff --git a/torch_ipex/csrc/jit/codegen/onednn/graph_helper.cpp b/torch_ipex/csrc/jit/codegen/onednn/graph_helper.cpp
@@ -159,6 +159,8 @@ Operator createOperator(Node* node) {
         .setAttr("keep_stats", false);
   } else if (node->kind() == Symbol::aten("add")) {
     return makeBinaryOp(node, opkind::Add);
+  } else if (node->kind() == Symbol::aten("div")) {
+    return makeBinaryOp(node, opkind::Divide);
   } else if (node->kind() == Symbol::aten("tanh")) {
     return makeEltwiseOp(node, opkind::Tanh);
   } else if (node->kind() == Symbol::aten("relu")) {
diff --git a/torch_ipex/csrc/jit/codegen/onednn/prepare_binary.cpp b/torch_ipex/csrc/jit/codegen/onednn/prepare_binary.cpp
@@ -38,7 +38,8 @@ static void ConvertScalarToTensor(Block* block) {
       ConvertScalarToTensor(sub);
     }
 
-    if (node->kind() == aten::add || node->kind() == aten::mul) {
+    if (node->kind() == aten::add || node->kind() == aten::mul ||
+        node->kind() == aten::div) {
       mayConvertScalarInputToTensor(node);
     }
   }
@@ -71,24 +72,25 @@ static void DecomposeFusedAdd(Block* block) {
   }
 }
 
-static void EliminateIdentityMulAdd(Block* block) {
+static void EliminateIdentityMulAddDiv(Block* block) {
   for (auto node : block->nodes()) {
     for (auto sub : node->blocks()) {
-      EliminateIdentityMulAdd(sub);
+      EliminateIdentityMulAddDiv(sub);
     }
 
     if ((node->kind() == aten::add && compareConstValue(node->input(1), 0.0)) ||
-        (node->kind() == aten::mul && compareConstValue(node->input(1), 1.0))) {
+        (node->kind() == aten::mul && compareConstValue(node->input(1), 1.0)) ||
+        (node->kind() == aten::div && compareConstValue(node->input(1), 1.0))) {
       node->output()->replaceAllUsesWith(node->input(0));
     }
   }
 }
 
 void PrepareBinaryForLLGA(const std::shared_ptr<Graph>& graph) {
   DecomposeFusedAdd(graph->block());
-  EliminateIdentityMulAdd(graph->block());
+  EliminateIdentityMulAddDiv(graph->block());
   EliminateDeadCode(graph);
-  // ConvertScalarToTensor must be placed after EliminateIdentityMulAdd
+  // ConvertScalarToTensor must be placed after EliminateIdentityMulAddDiv
   ConvertScalarToTensor(graph->block());
   // TODO: after conv-bn folding, bias will become bias? (Optional) after this pass 
   // and will lose it when using mustNotBeNone to check Optional Bias
diff --git a/torch_ipex/csrc/jit/codegen/onednn/prepare_binary.h b/torch_ipex/csrc/jit/codegen/onednn/prepare_binary.h
@@ -11,12 +11,14 @@ namespace onednn {
 //
 // The pass does the following:
 //
-// - (1). Convert scalar input of aten::add and aten::mul into Float tensor with
+// - (1). Convert scalar input of aten::add, aten::mul and aten::div into Float
+// tensor with
 //   dimension [1]
 //
 // - (2). Decompose fused add into aten::mul + aten::add when alpha != 1.0
 //
-// - (3). Eliminate identity add/mul, i.e., tensor + 0, tensor * 1
+// - (3). Eliminate identity add/mul/div, i.e., tensor + 0, tensor * 1,
+// tensor / 1
 //
 // (1) and (2) are in the purpose of aligning with the OP spec of LLGA.
 // (3) is an optimization pass to remove the redundant calculation

Original file line number	Diff line number	Diff line change
`@@ -38,7 +38,8 @@ static void ConvertScalarToTensor(Block* block) {`
`38`	`38`	`ConvertScalarToTensor(sub);`
`39`	`39`	`}`
`40`	`40`
`41`		`- if (node->kind() == aten::add \|\| node->kind() == aten::mul) {`
	`41`	`+ if (node->kind() == aten::add \|\| node->kind() == aten::mul \|\|`
	`42`	`+ node->kind() == aten::div) {`
`42`	`43`	`mayConvertScalarInputToTensor(node);`
`43`	`44`	`}`
`44`	`45`	`}`
`@@ -71,24 +72,25 @@ static void DecomposeFusedAdd(Block* block) {`
`71`	`72`	`}`
`72`	`73`	`}`
`73`	`74`
`74`		`-static void EliminateIdentityMulAdd(Block* block) {`
	`75`	`+static void EliminateIdentityMulAddDiv(Block* block) {`
`75`	`76`	`for (auto node : block->nodes()) {`
`76`	`77`	`for (auto sub : node->blocks()) {`
`77`		`- EliminateIdentityMulAdd(sub);`
	`78`	`+ EliminateIdentityMulAddDiv(sub);`
`78`	`79`	`}`
`79`	`80`
`80`	`81`	`if ((node->kind() == aten::add && compareConstValue(node->input(1), 0.0)) \|\|`
`81`		`- (node->kind() == aten::mul && compareConstValue(node->input(1), 1.0))) {`
	`82`	`+ (node->kind() == aten::mul && compareConstValue(node->input(1), 1.0)) \|\|`
	`83`	`+ (node->kind() == aten::div && compareConstValue(node->input(1), 1.0))) {`
`82`	`84`	`node->output()->replaceAllUsesWith(node->input(0));`
`83`	`85`	`}`
`84`	`86`	`}`
`85`	`87`	`}`
`86`	`88`
`87`	`89`	`void PrepareBinaryForLLGA(const std::shared_ptr<Graph>& graph) {`
`88`	`90`	`DecomposeFusedAdd(graph->block());`
`89`		`- EliminateIdentityMulAdd(graph->block());`
	`91`	`+ EliminateIdentityMulAddDiv(graph->block());`
`90`	`92`	`EliminateDeadCode(graph);`
`91`		`- // ConvertScalarToTensor must be placed after EliminateIdentityMulAdd`
	`93`	`+ // ConvertScalarToTensor must be placed after EliminateIdentityMulAddDiv`
`92`	`94`	`ConvertScalarToTensor(graph->block());`
`93`	`95`	`// TODO: after conv-bn folding, bias will become bias? (Optional) after this pass`
`94`	`96`	`// and will lose it when using mustNotBeNone to check Optional Bias`