Skip to content

Commit bc331d6

Browse files
arsenmsvkeerthy
authored andcommitted
AMDGPU: Handle other fmin flavors in fract combine (#141987)
Since the input is either known not-nan, or we have explicit use code checking if the input is a nan, any of the 3 is valid to match.
1 parent 47e9093 commit bc331d6

File tree

2 files changed

+230
-278
lines changed

2 files changed

+230
-278
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,7 @@ class AMDGPUCodeGenPrepareImpl
327327

328328
bool visitIntrinsicInst(IntrinsicInst &I);
329329
bool visitBitreverseIntrinsicInst(IntrinsicInst &I);
330-
bool visitMinNum(IntrinsicInst &I);
330+
bool visitFMinLike(IntrinsicInst &I);
331331
bool visitSqrt(IntrinsicInst &I);
332332
bool run();
333333
};
@@ -2200,7 +2200,9 @@ bool AMDGPUCodeGenPrepareImpl::visitIntrinsicInst(IntrinsicInst &I) {
22002200
case Intrinsic::bitreverse:
22012201
return visitBitreverseIntrinsicInst(I);
22022202
case Intrinsic::minnum:
2203-
return visitMinNum(I);
2203+
case Intrinsic::minimumnum:
2204+
case Intrinsic::minimum:
2205+
return visitFMinLike(I);
22042206
case Intrinsic::sqrt:
22052207
return visitSqrt(I);
22062208
default:
@@ -2219,15 +2221,22 @@ bool AMDGPUCodeGenPrepareImpl::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
22192221
}
22202222

22212223
/// Match non-nan fract pattern.
2222-
/// minnum(fsub(x, floor(x)), nextafter(1.0, -1.0)
2224+
/// minnum(fsub(x, floor(x)), nextafter(1.0, -1.0))
2225+
/// minimumnum(fsub(x, floor(x)), nextafter(1.0, -1.0))
2226+
/// minimum(fsub(x, floor(x)), nextafter(1.0, -1.0))
22232227
///
22242228
/// If fract is a useful instruction for the subtarget. Does not account for the
22252229
/// nan handling; the instruction has a nan check on the input value.
22262230
Value *AMDGPUCodeGenPrepareImpl::matchFractPat(IntrinsicInst &I) {
22272231
if (ST.hasFractBug())
22282232
return nullptr;
22292233

2230-
if (I.getIntrinsicID() != Intrinsic::minnum)
2234+
Intrinsic::ID IID = I.getIntrinsicID();
2235+
2236+
// The value is only used in contexts where we know the input isn't a nan, so
2237+
// any of the fmin variants are fine.
2238+
if (IID != Intrinsic::minnum &&
2239+
IID != Intrinsic::minimumnum & IID != Intrinsic::minimum)
22312240
return nullptr;
22322241

22332242
Type *Ty = I.getType();
@@ -2273,7 +2282,7 @@ Value *AMDGPUCodeGenPrepareImpl::applyFractPat(IRBuilder<> &Builder,
22732282
return insertValues(Builder, FractArg->getType(), ResultVals);
22742283
}
22752284

2276-
bool AMDGPUCodeGenPrepareImpl::visitMinNum(IntrinsicInst &I) {
2285+
bool AMDGPUCodeGenPrepareImpl::visitFMinLike(IntrinsicInst &I) {
22772286
Value *FractArg = matchFractPat(I);
22782287
if (!FractArg)
22792288
return false;

0 commit comments

Comments
 (0)