Skip to content

Commit 6c86b7d

Browse files
authored
[ValueTracking][InstCombine] Generalize ignoreSignBitOfZero/NaN to handle more cases (#141015)
This patch was originally part of #139861. It generalizes `ignoreSignBitOfZero/NaN` to handle more instructions/intrinsics. BTW, I find it mitigates performance regressions caused by #141010 (IR diff https://github.com/dtcxzyw/llvm-opt-benchmark/pull/2365/files). We don't need to propagate FMF from fcmp into select, since we can infer demanded properties from the user of select.
1 parent 6a477f6 commit 6c86b7d

File tree

4 files changed

+459
-52
lines changed

4 files changed

+459
-52
lines changed

llvm/include/llvm/Analysis/ValueTracking.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,14 @@ LLVM_ABI bool isKnownNeverNaN(const Value *V, unsigned Depth,
300300
LLVM_ABI std::optional<bool>
301301
computeKnownFPSignBit(const Value *V, unsigned Depth, const SimplifyQuery &SQ);
302302

303+
/// Return true if the sign bit of the FP value can be ignored by the user when
304+
/// the value is zero.
305+
bool canIgnoreSignBitOfZero(const Use &U);
306+
307+
/// Return true if the sign bit of the FP value can be ignored by the user when
308+
/// the value is NaN.
309+
bool canIgnoreSignBitOfNaN(const Use &U);
310+
303311
/// If the specified value can be set by repeating the same byte in memory,
304312
/// return the i8 value that it is represented with. This is true for all i8
305313
/// values obviously, but is also true for i32 0, i32 -1, i16 0xF0F0, double

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5937,6 +5937,114 @@ std::optional<bool> llvm::computeKnownFPSignBit(const Value *V, unsigned Depth,
59375937
return Known.SignBit;
59385938
}
59395939

5940+
bool llvm::canIgnoreSignBitOfZero(const Use &U) {
5941+
auto *User = cast<Instruction>(U.getUser());
5942+
if (auto *FPOp = dyn_cast<FPMathOperator>(User)) {
5943+
if (FPOp->hasNoSignedZeros())
5944+
return true;
5945+
}
5946+
5947+
switch (User->getOpcode()) {
5948+
case Instruction::FPToSI:
5949+
case Instruction::FPToUI:
5950+
return true;
5951+
case Instruction::FCmp:
5952+
// fcmp treats both positive and negative zero as equal.
5953+
return true;
5954+
case Instruction::Call:
5955+
if (auto *II = dyn_cast<IntrinsicInst>(User)) {
5956+
switch (II->getIntrinsicID()) {
5957+
case Intrinsic::fabs:
5958+
return true;
5959+
case Intrinsic::copysign:
5960+
return U.getOperandNo() == 0;
5961+
case Intrinsic::is_fpclass:
5962+
case Intrinsic::vp_is_fpclass: {
5963+
auto Test =
5964+
static_cast<FPClassTest>(
5965+
cast<ConstantInt>(II->getArgOperand(1))->getZExtValue()) &
5966+
FPClassTest::fcZero;
5967+
return Test == FPClassTest::fcZero || Test == FPClassTest::fcNone;
5968+
}
5969+
default:
5970+
return false;
5971+
}
5972+
}
5973+
return false;
5974+
default:
5975+
return false;
5976+
}
5977+
}
5978+
5979+
bool llvm::canIgnoreSignBitOfNaN(const Use &U) {
5980+
auto *User = cast<Instruction>(U.getUser());
5981+
if (auto *FPOp = dyn_cast<FPMathOperator>(User)) {
5982+
if (FPOp->hasNoNaNs())
5983+
return true;
5984+
}
5985+
5986+
switch (User->getOpcode()) {
5987+
case Instruction::FPToSI:
5988+
case Instruction::FPToUI:
5989+
return true;
5990+
// Proper FP math operations ignore the sign bit of NaN.
5991+
case Instruction::FAdd:
5992+
case Instruction::FSub:
5993+
case Instruction::FMul:
5994+
case Instruction::FDiv:
5995+
case Instruction::FRem:
5996+
case Instruction::FPTrunc:
5997+
case Instruction::FPExt:
5998+
case Instruction::FCmp:
5999+
return true;
6000+
// Bitwise FP operations should preserve the sign bit of NaN.
6001+
case Instruction::FNeg:
6002+
case Instruction::Select:
6003+
case Instruction::PHI:
6004+
return false;
6005+
case Instruction::Ret:
6006+
return User->getFunction()->getAttributes().getRetNoFPClass() &
6007+
FPClassTest::fcNan;
6008+
case Instruction::Call:
6009+
case Instruction::Invoke: {
6010+
if (auto *II = dyn_cast<IntrinsicInst>(User)) {
6011+
switch (II->getIntrinsicID()) {
6012+
case Intrinsic::fabs:
6013+
return true;
6014+
case Intrinsic::copysign:
6015+
return U.getOperandNo() == 0;
6016+
// Other proper FP math intrinsics ignore the sign bit of NaN.
6017+
case Intrinsic::maxnum:
6018+
case Intrinsic::minnum:
6019+
case Intrinsic::maximum:
6020+
case Intrinsic::minimum:
6021+
case Intrinsic::maximumnum:
6022+
case Intrinsic::minimumnum:
6023+
case Intrinsic::canonicalize:
6024+
case Intrinsic::fma:
6025+
case Intrinsic::fmuladd:
6026+
case Intrinsic::sqrt:
6027+
case Intrinsic::pow:
6028+
case Intrinsic::powi:
6029+
case Intrinsic::fptoui_sat:
6030+
case Intrinsic::fptosi_sat:
6031+
case Intrinsic::is_fpclass:
6032+
case Intrinsic::vp_is_fpclass:
6033+
return true;
6034+
default:
6035+
return false;
6036+
}
6037+
}
6038+
6039+
FPClassTest NoFPClass =
6040+
cast<CallBase>(User)->getParamNoFPClass(U.getOperandNo());
6041+
return NoFPClass & FPClassTest::fcNan;
6042+
}
6043+
default:
6044+
return false;
6045+
}
6046+
}
6047+
59406048
Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) {
59416049

59426050
// All byte-wide stores are splatable, even of arbitrary variables.

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

Lines changed: 7 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -2773,47 +2773,6 @@ Instruction *InstCombinerImpl::foldAndOrOfSelectUsingImpliedCond(Value *Op,
27732773
return nullptr;
27742774
}
27752775

2776-
/// Return true if the sign bit of result can be ignored when the result is
2777-
/// zero.
2778-
static bool ignoreSignBitOfZero(Instruction &I) {
2779-
if (I.hasNoSignedZeros())
2780-
return true;
2781-
2782-
// Check if the sign bit is ignored by the only user.
2783-
if (!I.hasOneUse())
2784-
return false;
2785-
Instruction *User = I.user_back();
2786-
2787-
// fcmp treats both positive and negative zero as equal.
2788-
if (User->getOpcode() == Instruction::FCmp)
2789-
return true;
2790-
2791-
if (auto *FPOp = dyn_cast<FPMathOperator>(User))
2792-
return FPOp->hasNoSignedZeros();
2793-
2794-
return false;
2795-
}
2796-
2797-
/// Return true if the sign bit of result can be ignored when the result is NaN.
2798-
static bool ignoreSignBitOfNaN(Instruction &I) {
2799-
if (I.hasNoNaNs())
2800-
return true;
2801-
2802-
// Check if the sign bit is ignored by the only user.
2803-
if (!I.hasOneUse())
2804-
return false;
2805-
Instruction *User = I.user_back();
2806-
2807-
// fcmp ignores the sign bit of NaN.
2808-
if (User->getOpcode() == Instruction::FCmp)
2809-
return true;
2810-
2811-
if (auto *FPOp = dyn_cast<FPMathOperator>(User))
2812-
return FPOp->hasNoNaNs();
2813-
2814-
return false;
2815-
}
2816-
28172776
// Canonicalize select with fcmp to fabs(). -0.0 makes this tricky. We need
28182777
// fast-math-flags (nsz) or fsub with +0.0 (not fneg) for this to work.
28192778
static Instruction *foldSelectWithFCmpToFabs(SelectInst &SI,
@@ -2838,7 +2797,8 @@ static Instruction *foldSelectWithFCmpToFabs(SelectInst &SI,
28382797
// of NAN, but IEEE-754 specifies the signbit of NAN values with
28392798
// fneg/fabs operations.
28402799
if (match(TrueVal, m_FSub(m_PosZeroFP(), m_Specific(X))) &&
2841-
(cast<FPMathOperator>(CondVal)->hasNoNaNs() || ignoreSignBitOfNaN(SI) ||
2800+
(cast<FPMathOperator>(CondVal)->hasNoNaNs() || SI.hasNoNaNs() ||
2801+
(SI.hasOneUse() && canIgnoreSignBitOfNaN(*SI.use_begin())) ||
28422802
isKnownNeverNaN(X, /*Depth=*/0,
28432803
IC.getSimplifyQuery().getWithInstruction(
28442804
cast<Instruction>(CondVal))))) {
@@ -2885,7 +2845,11 @@ static Instruction *foldSelectWithFCmpToFabs(SelectInst &SI,
28852845
// Note: We require "nnan" for this fold because fcmp ignores the signbit
28862846
// of NAN, but IEEE-754 specifies the signbit of NAN values with
28872847
// fneg/fabs operations.
2888-
if (!ignoreSignBitOfZero(SI) || !ignoreSignBitOfNaN(SI))
2848+
if (!SI.hasNoSignedZeros() &&
2849+
(!SI.hasOneUse() || !canIgnoreSignBitOfZero(*SI.use_begin())))
2850+
return nullptr;
2851+
if (!SI.hasNoNaNs() &&
2852+
(!SI.hasOneUse() || !canIgnoreSignBitOfNaN(*SI.use_begin())))
28892853
return nullptr;
28902854

28912855
if (Swap)

0 commit comments

Comments
 (0)