Skip to content

Commit 9ac00b8

Browse files
authored
[NVPTX] deprecate nvvm.rotate.* intrinsics, cleanup funnel-shift handling (#107655)
This change deprecates the following intrinsics which can be trivially converted to llvm funnel-shift intrinsics: - @llvm.nvvm.rotate.b32 - @llvm.nvvm.rotate.right.b64 - @llvm.nvvm.rotate.b64
1 parent 1fae131 commit 9ac00b8

File tree

9 files changed

+463
-574
lines changed

9 files changed

+463
-574
lines changed

llvm/docs/ReleaseNotes.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,12 @@ Changes to the LLVM IR
6363
* ``llvm.nvvm.bitcast.d2ll``
6464
* ``llvm.nvvm.bitcast.ll2d``
6565

66+
* Remove the following intrinsics which can be replaced with a funnel-shift:
67+
68+
* ``llvm.nvvm.rotate.b32``
69+
* ``llvm.nvvm.rotate.right.b64``
70+
* ``llvm.nvvm.rotate.b64``
71+
6672
Changes to LLVM infrastructure
6773
------------------------------
6874

llvm/include/llvm/IR/IntrinsicsNVVM.td

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4479,22 +4479,6 @@ def int_nvvm_sust_p_3d_v4i32_trap
44794479
"llvm.nvvm.sust.p.3d.v4i32.trap">,
44804480
ClangBuiltin<"__nvvm_sust_p_3d_v4i32_trap">;
44814481

4482-
4483-
def int_nvvm_rotate_b32
4484-
: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4485-
[IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.b32">,
4486-
ClangBuiltin<"__nvvm_rotate_b32">;
4487-
4488-
def int_nvvm_rotate_b64
4489-
: DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty],
4490-
[IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.b64">,
4491-
ClangBuiltin<"__nvvm_rotate_b64">;
4492-
4493-
def int_nvvm_rotate_right_b64
4494-
: DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty],
4495-
[IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.right.b64">,
4496-
ClangBuiltin<"__nvvm_rotate_right_b64">;
4497-
44984482
def int_nvvm_swap_lo_hi_b64
44994483
: DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty],
45004484
[IntrNoMem, IntrSpeculatable], "llvm.nvvm.swap.lo.hi.b64">,

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 106 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -1272,6 +1272,9 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
12721272
// nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
12731273
Expand =
12741274
Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1275+
else if (Name.consume_front("rotate."))
1276+
// nvvm.rotate.{b32,b64,right.b64}
1277+
Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
12751278
else
12761279
Expand = false;
12771280

@@ -2258,6 +2261,108 @@ void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
22582261
}
22592262
}
22602263

2264+
static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI,
2265+
Function *F, IRBuilder<> &Builder) {
2266+
Value *Rep = nullptr;
2267+
2268+
if (Name == "abs.i" || Name == "abs.ll") {
2269+
Value *Arg = CI->getArgOperand(0);
2270+
Value *Neg = Builder.CreateNeg(Arg, "neg");
2271+
Value *Cmp = Builder.CreateICmpSGE(
2272+
Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2273+
Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2274+
} else if (Name.starts_with("atomic.load.add.f32.p") ||
2275+
Name.starts_with("atomic.load.add.f64.p")) {
2276+
Value *Ptr = CI->getArgOperand(0);
2277+
Value *Val = CI->getArgOperand(1);
2278+
Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2279+
AtomicOrdering::SequentiallyConsistent);
2280+
} else if (Name.consume_front("max.") &&
2281+
(Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2282+
Name == "ui" || Name == "ull")) {
2283+
Value *Arg0 = CI->getArgOperand(0);
2284+
Value *Arg1 = CI->getArgOperand(1);
2285+
Value *Cmp = Name.starts_with("u")
2286+
? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
2287+
: Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
2288+
Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
2289+
} else if (Name.consume_front("min.") &&
2290+
(Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2291+
Name == "ui" || Name == "ull")) {
2292+
Value *Arg0 = CI->getArgOperand(0);
2293+
Value *Arg1 = CI->getArgOperand(1);
2294+
Value *Cmp = Name.starts_with("u")
2295+
? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
2296+
: Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
2297+
Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
2298+
} else if (Name == "clz.ll") {
2299+
// llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2300+
Value *Arg = CI->getArgOperand(0);
2301+
Value *Ctlz = Builder.CreateCall(
2302+
Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
2303+
{Arg->getType()}),
2304+
{Arg, Builder.getFalse()}, "ctlz");
2305+
Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2306+
} else if (Name == "popc.ll") {
2307+
// llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2308+
// i64.
2309+
Value *Arg = CI->getArgOperand(0);
2310+
Value *Popc = Builder.CreateCall(
2311+
Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
2312+
{Arg->getType()}),
2313+
Arg, "ctpop");
2314+
Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2315+
} else if (Name == "h2f") {
2316+
Rep = Builder.CreateCall(
2317+
Intrinsic::getDeclaration(F->getParent(), Intrinsic::convert_from_fp16,
2318+
{Builder.getFloatTy()}),
2319+
CI->getArgOperand(0), "h2f");
2320+
} else if (Name.consume_front("bitcast.") &&
2321+
(Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2322+
Name == "d2ll")) {
2323+
Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2324+
} else if (Name == "rotate.b32") {
2325+
Value *Arg = CI->getOperand(0);
2326+
Value *ShiftAmt = CI->getOperand(1);
2327+
Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2328+
{Arg, Arg, ShiftAmt});
2329+
} else if (Name == "rotate.b64") {
2330+
Type *Int64Ty = Builder.getInt64Ty();
2331+
Value *Arg = CI->getOperand(0);
2332+
Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2333+
Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2334+
{Arg, Arg, ZExtShiftAmt});
2335+
} else if (Name == "rotate.right.b64") {
2336+
Type *Int64Ty = Builder.getInt64Ty();
2337+
Value *Arg = CI->getOperand(0);
2338+
Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2339+
Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2340+
{Arg, Arg, ZExtShiftAmt});
2341+
} else {
2342+
Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
2343+
if (IID != Intrinsic::not_intrinsic &&
2344+
!F->getReturnType()->getScalarType()->isBFloatTy()) {
2345+
rename(F);
2346+
Function *NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
2347+
SmallVector<Value *, 2> Args;
2348+
for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2349+
Value *Arg = CI->getArgOperand(I);
2350+
Type *OldType = Arg->getType();
2351+
Type *NewType = NewFn->getArg(I)->getType();
2352+
Args.push_back(
2353+
(OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2354+
? Builder.CreateBitCast(Arg, NewType)
2355+
: Arg);
2356+
}
2357+
Rep = Builder.CreateCall(NewFn, Args);
2358+
if (F->getReturnType()->isIntegerTy())
2359+
Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2360+
}
2361+
}
2362+
2363+
return Rep;
2364+
}
2365+
22612366
static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F,
22622367
IRBuilder<> &Builder) {
22632368
LLVMContext &C = F->getContext();
@@ -4208,85 +4313,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
42084313

42094314
if (!IsX86 && Name == "stackprotectorcheck") {
42104315
Rep = nullptr;
4211-
} else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
4212-
Value *Arg = CI->getArgOperand(0);
4213-
Value *Neg = Builder.CreateNeg(Arg, "neg");
4214-
Value *Cmp = Builder.CreateICmpSGE(
4215-
Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
4216-
Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
4217-
} else if (IsNVVM && (Name.starts_with("atomic.load.add.f32.p") ||
4218-
Name.starts_with("atomic.load.add.f64.p"))) {
4219-
Value *Ptr = CI->getArgOperand(0);
4220-
Value *Val = CI->getArgOperand(1);
4221-
Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
4222-
AtomicOrdering::SequentiallyConsistent);
4223-
} else if (IsNVVM && Name.consume_front("max.") &&
4224-
(Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
4225-
Name == "ui" || Name == "ull")) {
4226-
Value *Arg0 = CI->getArgOperand(0);
4227-
Value *Arg1 = CI->getArgOperand(1);
4228-
Value *Cmp = Name.starts_with("u")
4229-
? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
4230-
: Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
4231-
Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
4232-
} else if (IsNVVM && Name.consume_front("min.") &&
4233-
(Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
4234-
Name == "ui" || Name == "ull")) {
4235-
Value *Arg0 = CI->getArgOperand(0);
4236-
Value *Arg1 = CI->getArgOperand(1);
4237-
Value *Cmp = Name.starts_with("u")
4238-
? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
4239-
: Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
4240-
Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
4241-
} else if (IsNVVM && Name == "clz.ll") {
4242-
// llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
4243-
Value *Arg = CI->getArgOperand(0);
4244-
Value *Ctlz = Builder.CreateCall(
4245-
Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
4246-
{Arg->getType()}),
4247-
{Arg, Builder.getFalse()}, "ctlz");
4248-
Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
4249-
} else if (IsNVVM && Name == "popc.ll") {
4250-
// llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
4251-
// i64.
4252-
Value *Arg = CI->getArgOperand(0);
4253-
Value *Popc = Builder.CreateCall(
4254-
Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
4255-
{Arg->getType()}),
4256-
Arg, "ctpop");
4257-
Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
42584316
} else if (IsNVVM) {
4259-
if (Name == "h2f") {
4260-
Rep =
4261-
Builder.CreateCall(Intrinsic::getDeclaration(
4262-
F->getParent(), Intrinsic::convert_from_fp16,
4263-
{Builder.getFloatTy()}),
4264-
CI->getArgOperand(0), "h2f");
4265-
} else if (Name.consume_front("bitcast.") &&
4266-
(Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
4267-
Name == "d2ll")) {
4268-
Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
4269-
} else {
4270-
Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
4271-
if (IID != Intrinsic::not_intrinsic &&
4272-
!F->getReturnType()->getScalarType()->isBFloatTy()) {
4273-
rename(F);
4274-
NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
4275-
SmallVector<Value *, 2> Args;
4276-
for (size_t I = 0; I < NewFn->arg_size(); ++I) {
4277-
Value *Arg = CI->getArgOperand(I);
4278-
Type *OldType = Arg->getType();
4279-
Type *NewType = NewFn->getArg(I)->getType();
4280-
Args.push_back((OldType->isIntegerTy() &&
4281-
NewType->getScalarType()->isBFloatTy())
4282-
? Builder.CreateBitCast(Arg, NewType)
4283-
: Arg);
4284-
}
4285-
Rep = Builder.CreateCall(NewFn, Args);
4286-
if (F->getReturnType()->isIntegerTy())
4287-
Rep = Builder.CreateBitCast(Rep, F->getReturnType());
4288-
}
4289-
}
4317+
Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
42904318
} else if (IsX86) {
42914319
Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
42924320
} else if (IsARM) {

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -594,20 +594,13 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
594594
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
595595
setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
596596

597-
// TODO: we may consider expanding ROTL/ROTR on older GPUs. Currently on GPUs
598-
// that don't have h/w rotation we lower them to multi-instruction assembly.
599-
// See ROT*_sw in NVPTXIntrInfo.td
600-
setOperationAction(ISD::ROTL, MVT::i64, Legal);
601-
setOperationAction(ISD::ROTR, MVT::i64, Legal);
602-
setOperationAction(ISD::ROTL, MVT::i32, Legal);
603-
setOperationAction(ISD::ROTR, MVT::i32, Legal);
604-
605-
setOperationAction(ISD::ROTL, MVT::i16, Expand);
606-
setOperationAction(ISD::ROTL, MVT::v2i16, Expand);
607-
setOperationAction(ISD::ROTR, MVT::i16, Expand);
608-
setOperationAction(ISD::ROTR, MVT::v2i16, Expand);
609-
setOperationAction(ISD::ROTL, MVT::i8, Expand);
610-
setOperationAction(ISD::ROTR, MVT::i8, Expand);
597+
setOperationAction({ISD::ROTL, ISD::ROTR},
598+
{MVT::i8, MVT::i16, MVT::v2i16, MVT::i32, MVT::i64},
599+
Expand);
600+
601+
if (STI.hasHWROT32())
602+
setOperationAction({ISD::FSHL, ISD::FSHR}, MVT::i32, Legal);
603+
611604
setOperationAction(ISD::BSWAP, MVT::i16, Expand);
612605

613606
setOperationAction(ISD::BR_JT, MVT::Other, Custom);

0 commit comments

Comments
 (0)