Skip to content

Commit 4cb61c2

Browse files
author
Dmitry Chernenkov
committed
Revert "[NVPTX] deprecate nvvm.rotate.* intrinsics, cleanup funnel-shift handling (#107655)"
This reverts commit 9ac00b8.
1 parent 9a0e281 commit 4cb61c2

File tree

9 files changed

+574
-463
lines changed

9 files changed

+574
-463
lines changed

llvm/docs/ReleaseNotes.rst

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,6 @@ Changes to the LLVM IR
6363
* ``llvm.nvvm.bitcast.d2ll``
6464
* ``llvm.nvvm.bitcast.ll2d``
6565

66-
* Remove the following intrinsics which can be replaced with a funnel-shift:
67-
68-
* ``llvm.nvvm.rotate.b32``
69-
* ``llvm.nvvm.rotate.right.b64``
70-
* ``llvm.nvvm.rotate.b64``
71-
7266
Changes to LLVM infrastructure
7367
------------------------------
7468

llvm/include/llvm/IR/IntrinsicsNVVM.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4479,6 +4479,22 @@ def int_nvvm_sust_p_3d_v4i32_trap
44794479
"llvm.nvvm.sust.p.3d.v4i32.trap">,
44804480
ClangBuiltin<"__nvvm_sust_p_3d_v4i32_trap">;
44814481

4482+
4483+
def int_nvvm_rotate_b32
4484+
: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4485+
[IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.b32">,
4486+
ClangBuiltin<"__nvvm_rotate_b32">;
4487+
4488+
def int_nvvm_rotate_b64
4489+
: DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty],
4490+
[IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.b64">,
4491+
ClangBuiltin<"__nvvm_rotate_b64">;
4492+
4493+
def int_nvvm_rotate_right_b64
4494+
: DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty],
4495+
[IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.right.b64">,
4496+
ClangBuiltin<"__nvvm_rotate_right_b64">;
4497+
44824498
def int_nvvm_swap_lo_hi_b64
44834499
: DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty],
44844500
[IntrNoMem, IntrSpeculatable], "llvm.nvvm.swap.lo.hi.b64">,

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 78 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -1272,9 +1272,6 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
12721272
// nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
12731273
Expand =
12741274
Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1275-
else if (Name.consume_front("rotate."))
1276-
// nvvm.rotate.{b32,b64,right.b64}
1277-
Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
12781275
else
12791276
Expand = false;
12801277

@@ -2261,108 +2258,6 @@ void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
22612258
}
22622259
}
22632260

2264-
static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI,
2265-
Function *F, IRBuilder<> &Builder) {
2266-
Value *Rep = nullptr;
2267-
2268-
if (Name == "abs.i" || Name == "abs.ll") {
2269-
Value *Arg = CI->getArgOperand(0);
2270-
Value *Neg = Builder.CreateNeg(Arg, "neg");
2271-
Value *Cmp = Builder.CreateICmpSGE(
2272-
Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2273-
Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2274-
} else if (Name.starts_with("atomic.load.add.f32.p") ||
2275-
Name.starts_with("atomic.load.add.f64.p")) {
2276-
Value *Ptr = CI->getArgOperand(0);
2277-
Value *Val = CI->getArgOperand(1);
2278-
Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2279-
AtomicOrdering::SequentiallyConsistent);
2280-
} else if (Name.consume_front("max.") &&
2281-
(Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2282-
Name == "ui" || Name == "ull")) {
2283-
Value *Arg0 = CI->getArgOperand(0);
2284-
Value *Arg1 = CI->getArgOperand(1);
2285-
Value *Cmp = Name.starts_with("u")
2286-
? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
2287-
: Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
2288-
Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
2289-
} else if (Name.consume_front("min.") &&
2290-
(Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2291-
Name == "ui" || Name == "ull")) {
2292-
Value *Arg0 = CI->getArgOperand(0);
2293-
Value *Arg1 = CI->getArgOperand(1);
2294-
Value *Cmp = Name.starts_with("u")
2295-
? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
2296-
: Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
2297-
Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
2298-
} else if (Name == "clz.ll") {
2299-
// llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2300-
Value *Arg = CI->getArgOperand(0);
2301-
Value *Ctlz = Builder.CreateCall(
2302-
Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
2303-
{Arg->getType()}),
2304-
{Arg, Builder.getFalse()}, "ctlz");
2305-
Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2306-
} else if (Name == "popc.ll") {
2307-
// llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2308-
// i64.
2309-
Value *Arg = CI->getArgOperand(0);
2310-
Value *Popc = Builder.CreateCall(
2311-
Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
2312-
{Arg->getType()}),
2313-
Arg, "ctpop");
2314-
Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2315-
} else if (Name == "h2f") {
2316-
Rep = Builder.CreateCall(
2317-
Intrinsic::getDeclaration(F->getParent(), Intrinsic::convert_from_fp16,
2318-
{Builder.getFloatTy()}),
2319-
CI->getArgOperand(0), "h2f");
2320-
} else if (Name.consume_front("bitcast.") &&
2321-
(Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2322-
Name == "d2ll")) {
2323-
Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2324-
} else if (Name == "rotate.b32") {
2325-
Value *Arg = CI->getOperand(0);
2326-
Value *ShiftAmt = CI->getOperand(1);
2327-
Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2328-
{Arg, Arg, ShiftAmt});
2329-
} else if (Name == "rotate.b64") {
2330-
Type *Int64Ty = Builder.getInt64Ty();
2331-
Value *Arg = CI->getOperand(0);
2332-
Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2333-
Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2334-
{Arg, Arg, ZExtShiftAmt});
2335-
} else if (Name == "rotate.right.b64") {
2336-
Type *Int64Ty = Builder.getInt64Ty();
2337-
Value *Arg = CI->getOperand(0);
2338-
Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2339-
Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2340-
{Arg, Arg, ZExtShiftAmt});
2341-
} else {
2342-
Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
2343-
if (IID != Intrinsic::not_intrinsic &&
2344-
!F->getReturnType()->getScalarType()->isBFloatTy()) {
2345-
rename(F);
2346-
Function *NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
2347-
SmallVector<Value *, 2> Args;
2348-
for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2349-
Value *Arg = CI->getArgOperand(I);
2350-
Type *OldType = Arg->getType();
2351-
Type *NewType = NewFn->getArg(I)->getType();
2352-
Args.push_back(
2353-
(OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2354-
? Builder.CreateBitCast(Arg, NewType)
2355-
: Arg);
2356-
}
2357-
Rep = Builder.CreateCall(NewFn, Args);
2358-
if (F->getReturnType()->isIntegerTy())
2359-
Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2360-
}
2361-
}
2362-
2363-
return Rep;
2364-
}
2365-
23662261
static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F,
23672262
IRBuilder<> &Builder) {
23682263
LLVMContext &C = F->getContext();
@@ -4313,8 +4208,85 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
43134208

43144209
if (!IsX86 && Name == "stackprotectorcheck") {
43154210
Rep = nullptr;
4211+
} else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
4212+
Value *Arg = CI->getArgOperand(0);
4213+
Value *Neg = Builder.CreateNeg(Arg, "neg");
4214+
Value *Cmp = Builder.CreateICmpSGE(
4215+
Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
4216+
Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
4217+
} else if (IsNVVM && (Name.starts_with("atomic.load.add.f32.p") ||
4218+
Name.starts_with("atomic.load.add.f64.p"))) {
4219+
Value *Ptr = CI->getArgOperand(0);
4220+
Value *Val = CI->getArgOperand(1);
4221+
Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
4222+
AtomicOrdering::SequentiallyConsistent);
4223+
} else if (IsNVVM && Name.consume_front("max.") &&
4224+
(Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
4225+
Name == "ui" || Name == "ull")) {
4226+
Value *Arg0 = CI->getArgOperand(0);
4227+
Value *Arg1 = CI->getArgOperand(1);
4228+
Value *Cmp = Name.starts_with("u")
4229+
? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
4230+
: Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
4231+
Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
4232+
} else if (IsNVVM && Name.consume_front("min.") &&
4233+
(Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
4234+
Name == "ui" || Name == "ull")) {
4235+
Value *Arg0 = CI->getArgOperand(0);
4236+
Value *Arg1 = CI->getArgOperand(1);
4237+
Value *Cmp = Name.starts_with("u")
4238+
? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
4239+
: Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
4240+
Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
4241+
} else if (IsNVVM && Name == "clz.ll") {
4242+
// llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
4243+
Value *Arg = CI->getArgOperand(0);
4244+
Value *Ctlz = Builder.CreateCall(
4245+
Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
4246+
{Arg->getType()}),
4247+
{Arg, Builder.getFalse()}, "ctlz");
4248+
Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
4249+
} else if (IsNVVM && Name == "popc.ll") {
4250+
// llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
4251+
// i64.
4252+
Value *Arg = CI->getArgOperand(0);
4253+
Value *Popc = Builder.CreateCall(
4254+
Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
4255+
{Arg->getType()}),
4256+
Arg, "ctpop");
4257+
Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
43164258
} else if (IsNVVM) {
4317-
Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4259+
if (Name == "h2f") {
4260+
Rep =
4261+
Builder.CreateCall(Intrinsic::getDeclaration(
4262+
F->getParent(), Intrinsic::convert_from_fp16,
4263+
{Builder.getFloatTy()}),
4264+
CI->getArgOperand(0), "h2f");
4265+
} else if (Name.consume_front("bitcast.") &&
4266+
(Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
4267+
Name == "d2ll")) {
4268+
Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
4269+
} else {
4270+
Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
4271+
if (IID != Intrinsic::not_intrinsic &&
4272+
!F->getReturnType()->getScalarType()->isBFloatTy()) {
4273+
rename(F);
4274+
NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
4275+
SmallVector<Value *, 2> Args;
4276+
for (size_t I = 0; I < NewFn->arg_size(); ++I) {
4277+
Value *Arg = CI->getArgOperand(I);
4278+
Type *OldType = Arg->getType();
4279+
Type *NewType = NewFn->getArg(I)->getType();
4280+
Args.push_back((OldType->isIntegerTy() &&
4281+
NewType->getScalarType()->isBFloatTy())
4282+
? Builder.CreateBitCast(Arg, NewType)
4283+
: Arg);
4284+
}
4285+
Rep = Builder.CreateCall(NewFn, Args);
4286+
if (F->getReturnType()->isIntegerTy())
4287+
Rep = Builder.CreateBitCast(Rep, F->getReturnType());
4288+
}
4289+
}
43184290
} else if (IsX86) {
43194291
Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
43204292
} else if (IsARM) {

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -594,13 +594,20 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
594594
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
595595
setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
596596

597-
setOperationAction({ISD::ROTL, ISD::ROTR},
598-
{MVT::i8, MVT::i16, MVT::v2i16, MVT::i32, MVT::i64},
599-
Expand);
600-
601-
if (STI.hasHWROT32())
602-
setOperationAction({ISD::FSHL, ISD::FSHR}, MVT::i32, Legal);
603-
597+
// TODO: we may consider expanding ROTL/ROTR on older GPUs. Currently on GPUs
598+
// that don't have h/w rotation we lower them to multi-instruction assembly.
599+
// See ROT*_sw in NVPTXIntrInfo.td
600+
setOperationAction(ISD::ROTL, MVT::i64, Legal);
601+
setOperationAction(ISD::ROTR, MVT::i64, Legal);
602+
setOperationAction(ISD::ROTL, MVT::i32, Legal);
603+
setOperationAction(ISD::ROTR, MVT::i32, Legal);
604+
605+
setOperationAction(ISD::ROTL, MVT::i16, Expand);
606+
setOperationAction(ISD::ROTL, MVT::v2i16, Expand);
607+
setOperationAction(ISD::ROTR, MVT::i16, Expand);
608+
setOperationAction(ISD::ROTR, MVT::v2i16, Expand);
609+
setOperationAction(ISD::ROTL, MVT::i8, Expand);
610+
setOperationAction(ISD::ROTR, MVT::i8, Expand);
604611
setOperationAction(ISD::BSWAP, MVT::i16, Expand);
605612

606613
setOperationAction(ISD::BR_JT, MVT::Other, Custom);

0 commit comments

Comments
 (0)