diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 24026e310ad11..2d7a69e788fff 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -4665,6 +4665,49 @@ InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) { if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa(OrigOp)) return nullptr; + // Avoid pushing freeze into common FMA patterns. In these cases, + // adding a freeze will prevent later optimizations that recognize + // FMA candidates like: + // (fmul x, y) + z -> fma(x, y, z) + // x + (fmul y, z) -> fma(y, z, x) + // (fmul x, y) - z -> fma(x, y, -z) + // x - (fmul y, z) -> fma(-y, z, x) + // + // which is common in performance-critical code like matrix multiplications or + // numerical kernels. + if (auto *BinOp = dyn_cast(OrigOp)) { + unsigned Opcode = BinOp->getOpcode(); + if ((Opcode == Instruction::FAdd || Opcode == Instruction::FSub) && + BinOp->hasAllowContract()) { + Value *A = BinOp->getOperand(0); + Value *B = BinOp->getOperand(1); + + if (Opcode == Instruction::FAdd) { + // Support (x * y) + z -> fma(x, y, z) + if (isa(A) && + cast(A)->getOpcode() == Instruction::FMul) + return nullptr; + + // Support x + (y * z) -> fma(y, z, x) + if (isa(B) && + cast(B)->getOpcode() == Instruction::FMul) + return nullptr; + } + + if (Opcode == Instruction::FSub) { + // Support (x * y) - z -> fma(x, y, -z) + if (isa(A) && + cast(A)->getOpcode() == Instruction::FMul) + return nullptr; + + // Support x - (y * z) -> fma(-y, z, x) + if (isa(B) && + cast(B)->getOpcode() == Instruction::FMul) + return nullptr; + } + } + } + // We can't push the freeze through an instruction which can itself create // poison. If the only source of new poison is flags, we can simply // strip them (since we know the only use is the freeze and nothing can diff --git a/llvm/test/Transforms/InstCombine/freeze.ll b/llvm/test/Transforms/InstCombine/freeze.ll index 8875ce1c566f3..1a778aebda29b 100644 --- a/llvm/test/Transforms/InstCombine/freeze.ll +++ b/llvm/test/Transforms/InstCombine/freeze.ll @@ -1195,6 +1195,72 @@ define i1 @propagate_drop_flags_icmp(i32 %a, i32 %b) { declare i32 @llvm.umax.i32(i32 %a, i32 %b) +define i1 @propagate_drop_fma_mul_add_left(float %arg1, float %arg2) { +; CHECK-LABEL: @propagate_drop_fma_mul_add_left( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[I:%.*]] = fmul contract float [[ARG1:%.*]], [[ARG2:%.*]] +; CHECK-NEXT: [[I1:%.*]] = fadd contract float [[I]], 1.000000e+00 +; CHECK-NEXT: [[I1_FR:%.*]] = freeze float [[I1]] +; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[I1_FR]], 0.000000e+00 +; CHECK-NEXT: ret i1 [[CMP]] +; +bb: + %i = fmul contract float %arg1, %arg2 + %i1 = fadd contract float %i, 1.0 + %cmp = fcmp ogt float %i1, 0.0 + %fr = freeze i1 %cmp + ret i1 %fr +} + +define i1 @propagate_drop_fma_add_mul_right(float %arg1, float %arg2) { +; CHECK-LABEL: @propagate_drop_fma_add_mul_right( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[I:%.*]] = fmul contract float [[ARG1:%.*]], [[ARG2:%.*]] +; CHECK-NEXT: [[I1:%.*]] = fadd contract float [[I]], 1.000000e+00 +; CHECK-NEXT: [[I1_FR:%.*]] = freeze float [[I1]] +; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[I1_FR]], 0.000000e+00 +; CHECK-NEXT: ret i1 [[CMP]] +; +bb: + %i = fmul contract float %arg1, %arg2 + %i1 = fadd contract float 1.0, %i + %cmp = fcmp ogt float %i1, 0.0 + %fr = freeze i1 %cmp + ret i1 %fr +} + +define i1 @propagate_drop_fma_mul_sub_left(float %arg1, float %arg2) { +; CHECK-LABEL: @propagate_drop_fma_mul_sub_left( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[I:%.*]] = fmul contract float [[ARG1:%.*]], [[ARG2:%.*]] +; CHECK-NEXT: [[I1:%.*]] = fadd contract float [[I]], -1.000000e+00 +; CHECK-NEXT: [[I1_FR:%.*]] = freeze float [[I1]] +; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[I1_FR]], 0.000000e+00 +; CHECK-NEXT: ret i1 [[CMP]] +; +bb: + %i = fmul contract float %arg1, %arg2 + %i1 = fsub contract float %i, 1.0 + %cmp = fcmp ogt float %i1, 0.0 + %fr = freeze i1 %cmp + ret i1 %fr +} + +define float @propagate_drop_fma_sub_mul_right(float %arg1, float %arg2) { +; CHECK-LABEL: @propagate_drop_fma_sub_mul_right( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[I:%.*]] = fmul contract float [[ARG1:%.*]], [[ARG2:%.*]] +; CHECK-NEXT: [[I1:%.*]] = fsub contract float 1.000000e+00, [[I]] +; CHECK-NEXT: [[FR:%.*]] = freeze float [[I1]] +; CHECK-NEXT: ret float [[FR]] +; +bb: + %i = fmul contract float %arg1, %arg2 + %i1 = fsub contract float 1.0, %i + %fr = freeze float %i1 + ret float %fr +} + define i32 @freeze_call_with_range_attr(i32 %a) { ; CHECK-LABEL: @freeze_call_with_range_attr( ; CHECK-NEXT: [[Y:%.*]] = lshr i32 2047, [[A:%.*]]