Skip to content

[DAGCombiner] Allow freeze(fmul) + fadd/fsub → FMA when using contract #142250

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16736,6 +16736,29 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
}
}

// fold (fadd (freeze (fmul x, y)), z) -> (fma x, y, z).
bool CanContract =
(Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
(Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros());
if (CanContract && N0.getOpcode() == ISD::FREEZE) {
SDValue FrozenMul = N0.getOperand(0);
if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
SDValue X = FrozenMul.getOperand(0);
SDValue Y = FrozenMul.getOperand(1);
return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, N1);
}
}

// fold (fadd x, (freeze (fmul y, z))) -> (fma y, z, x)
if (CanContract && N1.getOpcode() == ISD::FREEZE) {
SDValue FrozenMul = N1.getOperand(0);
if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
SDValue X = FrozenMul.getOperand(0);
SDValue Y = FrozenMul.getOperand(1);
return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, N0);
}
}

// More folding opportunities when target permits.
if (Aggressive) {
// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
Expand Down Expand Up @@ -17013,6 +17036,31 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
}

// fold (fsub (freeze (fmul x, y)), z) -> (fma x, y, (fneg z))
bool CanContract =
(Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
(Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros());
if (CanContract && N0.getOpcode() == ISD::FREEZE) {
SDValue FrozenMul = N0.getOperand(0);
if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
SDValue X = FrozenMul.getOperand(0);
SDValue Y = FrozenMul.getOperand(1);
SDValue NegZ = matcher.getNode(ISD::FNEG, SL, VT, N1);
return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, NegZ);
}
}

// fold (fsub z, (freeze(fmul x, y))) -> (fma (fneg x), y, z)
if (CanContract && N1.getOpcode() == ISD::FREEZE) {
SDValue FrozenMul = N1.getOperand(0);
if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
SDValue X = FrozenMul.getOperand(0);
SDValue Y = FrozenMul.getOperand(1);
SDValue NegX = matcher.getNode(ISD::FNEG, SL, VT, X);
return matcher.getNode(PreferredFusedOpcode, SL, VT, NegX, Y, N0);
}
}

auto isReassociable = [&Options](SDNode *N) {
return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
};
Expand Down
98 changes: 98 additions & 0 deletions llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck %s

define float @fma_from_freeze_mul_add_left(float %x, float %y) {
; CHECK-LABEL: fma_from_freeze_mul_add_left:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%mul = fmul reassoc nsz arcp contract afn float %x, %y
%mul.fr = freeze float %mul
%add = fadd reassoc nsz arcp contract afn float %mul.fr, 1.000000e+00
ret float %add
}

define float @fma_from_freeze_mul_add_left_with_nnan(float %x, float %y) {
; CHECK-LABEL: fma_from_freeze_mul_add_left_with_nnan:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
%mul.fr = freeze float %mul
%add = fadd reassoc nnan nsz arcp contract afn float %mul.fr, 1.000000e+00
ret float %add
}

define float @fma_from_freeze_mul_add_right(float %x, float %y) {
; CHECK-LABEL: fma_from_freeze_mul_add_right:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%mul = fmul reassoc nsz arcp contract afn float %x, %y
%mul.fr = freeze float %mul
%add = fadd reassoc nsz arcp contract afn float 1.000000e+00, %mul.fr
ret float %add
}

define float @fma_from_freeze_mul_add_right_with_nnan(float %x, float %y) {
; CHECK-LABEL: fma_from_freeze_mul_add_right_with_nnan:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
%mul.fr = freeze float %mul
%add = fadd reassoc nnan nsz arcp contract afn float 1.000000e+00, %mul.fr
ret float %add
}

define float @fma_from_freeze_mul_sub_left(float %x, float %y) {
; CHECK-LABEL: fma_from_freeze_mul_sub_left:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_fma_f32 v0, v0, v1, -1.0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%mul = fmul reassoc nsz arcp contract afn float %x, %y
%mul.fr = freeze float %mul
%sub = fsub reassoc nsz arcp contract afn float %mul.fr, 1.000000e+00
ret float %sub
}

define float @fma_from_freeze_mul_sub_left_with_nnan(float %x, float %y) {
; CHECK-LABEL: fma_from_freeze_mul_sub_left_with_nnan:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_fma_f32 v0, v0, v1, -1.0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
%mul.fr = freeze float %mul
%sub = fsub reassoc nnan nsz arcp contract afn float %mul.fr, 1.000000e+00
ret float %sub
}

define float @fma_from_freeze_mul_sub_right(float %x, float %y) {
; CHECK-LABEL: fma_from_freeze_mul_sub_right:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_fma_f32 v0, -v0, v1, 1.0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%mul = fmul reassoc nsz arcp contract afn float %x, %y
%mul.fr = freeze float %mul
%sub = fsub reassoc nsz arcp contract afn float 1.000000e+00, %mul.fr
ret float %sub
}

define float @fma_from_freeze_mul_sub_right_with_nnan(float %x, float %y) {
; CHECK-LABEL: fma_from_freeze_mul_sub_right_with_nnan:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_fma_f32 v0, -v0, v1, 1.0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
%mul.fr = freeze float %mul
%sub = fsub reassoc nnan nsz arcp contract afn float 1.000000e+00, %mul.fr
ret float %sub
}
Loading