-
Notifications
You must be signed in to change notification settings - Fork 13.7k
AMDGPU: Add codegen for atomicrmw operations usub_cond and usub_sat #141068
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1089,7 +1089,31 @@ multiclass DSAtomicRetNoRetPat_mc<DS_Pseudo inst, DS_Pseudo noRetInst, | |
} | ||
} | ||
|
||
multiclass DSAtomicRetNoRetPatCondSub_mc<DS_Pseudo inst, DS_Pseudo noRetInst, | ||
ValueType vt, string frag> { | ||
let OtherPredicates = [LDSRequiresM0Init] in { | ||
def : DSAtomicRetPat<inst, vt, | ||
!cast<PatFrag>(frag#"_local_m0_"#vt)>; | ||
def : DSAtomicRetPat<noRetInst, vt, | ||
!cast<PatFrag>(frag#"_local_m0_noret_"#vt), /* complexity */ 1>; | ||
} | ||
|
||
Comment on lines
+1094
to
+1100
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this dead? Weren't these only introduced after gfx9? |
||
let OtherPredicates = [NotLDSRequiresM0Init] in { | ||
def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, | ||
!cast<PatFrag>(frag#"_local_"#vt)>; | ||
def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt, | ||
!cast<PatFrag>(frag#"_local_noret_"#vt), /* complexity */ 1>; | ||
} | ||
|
||
let OtherPredicates = [HasGDS] in { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. GDS case isn't tested, but GDS atomic support is mostly missing for every other operator |
||
def : DSAtomicRetPat<inst, vt, | ||
!cast<PatFrag>(frag#"_region_m0_"#vt), | ||
/* complexity */ 0, /* gds */ 1>; | ||
def : DSAtomicRetPat<noRetInst, vt, | ||
!cast<PatFrag>(frag#"_region_m0_noret_"#vt), | ||
/* complexity */ 1, /* gds */ 1>; | ||
} | ||
} | ||
|
||
let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in { | ||
// Caution, the order of src and cmp is the *opposite* of the BUFFER_ATOMIC_CMPSWAP opcode. | ||
|
@@ -1172,6 +1196,14 @@ defm : DSAtomicRetNoRetPat_mc<DS_PK_ADD_RTN_F16, DS_PK_ADD_F16, v2f16, "atomic_l | |
defm : DSAtomicRetNoRetPat_mc<DS_PK_ADD_RTN_BF16, DS_PK_ADD_BF16, v2bf16, "atomic_load_fadd">; | ||
} | ||
|
||
let SubtargetPredicate = isGFX12Plus in { | ||
|
||
defm : DSAtomicRetNoRetPatCondSub_mc<DS_COND_SUB_RTN_U32, DS_COND_SUB_U32, i32, "atomic_load_usub_cond">; | ||
|
||
defm : DSAtomicRetNoRetPat_mc<DS_SUB_CLAMP_RTN_U32, DS_SUB_CLAMP_U32, i32, "atomic_load_usub_sat">; | ||
|
||
} // let SubtargetPredicate = isGFX12Plus | ||
|
||
let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in { | ||
defm : DSAtomicCmpXChgSwapped_mc<DS_CMPST_RTN_B32, DS_CMPST_B32, i32, "atomic_cmp_swap">; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1642,13 +1642,17 @@ defm : FlatAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_"#as, f64>; | |
defm : FlatAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_"#as, f64>; | ||
} | ||
|
||
let SubtargetPredicate = isGFX12Plus in { | ||
defm : FlatAtomicRtnPat<"FLAT_ATOMIC_COND_SUB_U32", "atomic_load_usub_cond_" #as, i32 >; | ||
|
||
let OtherPredicates = [HasAtomicCSubNoRtnInsts] in | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Another wrong use of "CSub" predicate. |
||
defm : FlatAtomicNoRtnPat<"FLAT_ATOMIC_COND_SUB_U32", "atomic_load_usub_cond_"#as, i32>; | ||
} | ||
} // end foreach as | ||
|
||
let SubtargetPredicate = isGFX12Plus in { | ||
defm : FlatAtomicRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32 >; | ||
|
||
let OtherPredicates = [HasAtomicCSubNoRtnInsts] in | ||
defm : FlatAtomicNoRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>; | ||
defm : FlatAtomicNoRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>; | ||
} | ||
|
||
let OtherPredicates = [HasD16LoadStore] in { | ||
|
@@ -1788,10 +1792,10 @@ defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR", "atomic_load_or_global", i32>; | |
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP", "atomic_swap_global", i32>; | ||
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_global", i32, v2i32>; | ||
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR", "atomic_load_xor_global", i32>; | ||
defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>; | ||
defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_CSUB", "atomic_load_usub_sat_global", i32>; | ||
|
||
let OtherPredicates = [HasAtomicCSubNoRtnInsts] in | ||
defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>; | ||
defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_CSUB", "atomic_load_usub_sat_global", i32>; | ||
|
||
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", "atomic_load_add_global", i64>; | ||
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", "atomic_load_sub_global", i64>; | ||
|
@@ -1808,10 +1812,8 @@ defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_ | |
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", "atomic_load_xor_global", i64>; | ||
|
||
let SubtargetPredicate = isGFX12Plus in { | ||
defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "global_addrspace", i32>; | ||
|
||
let OtherPredicates = [HasAtomicCSubNoRtnInsts] in | ||
defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "global_addrspace", i32>; | ||
defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_COND_SUB_U32", "atomic_load_usub_cond_global", i32>; | ||
defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_COND_SUB_U32", "atomic_load_usub_cond_global", i32>; | ||
} | ||
|
||
let OtherPredicates = [isGFX12Plus] in { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2191,6 +2191,14 @@ R600TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { | |
// FIXME: Cayman at least appears to have instructions for this, but the | ||
// instruction defintions appear to be missing. | ||
return AtomicExpansionKind::CmpXChg; | ||
case AtomicRMWInst::USubCond: | ||
case AtomicRMWInst::USubSat: | ||
if (auto *IntTy = dyn_cast<IntegerType>(RMW->getType())) { | ||
unsigned Size = IntTy->getBitWidth(); | ||
if (Size == 32) | ||
return AtomicExpansionKind::None; | ||
} | ||
return AtomicExpansionKind::CmpXChg; | ||
Comment on lines
+2196
to
+2201
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This isn't tested? I'm also assuming r600 didn't have these and should just return cmpxchg |
||
case AtomicRMWInst::Xchg: { | ||
const DataLayout &DL = RMW->getFunction()->getDataLayout(); | ||
unsigned ValSize = DL.getTypeSizeInBits(RMW->getType()); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -994,6 +994,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, | |
ISD::ATOMIC_LOAD_FMAX, | ||
ISD::ATOMIC_LOAD_UINC_WRAP, | ||
ISD::ATOMIC_LOAD_UDEC_WRAP, | ||
ISD::ATOMIC_LOAD_USUB_COND, | ||
ISD::ATOMIC_LOAD_USUB_SAT, | ||
ISD::INTRINSIC_VOID, | ||
ISD::INTRINSIC_W_CHAIN}); | ||
|
||
|
@@ -16806,10 +16808,10 @@ static bool isV2BF16(Type *Ty) { | |
} | ||
|
||
/// \return true if atomicrmw integer ops work for the type. | ||
static bool isAtomicRMWLegalIntTy(Type *Ty) { | ||
static bool isAtomicRMWLegalIntTy(Type *Ty, bool Allow64 = true) { | ||
if (auto *IT = dyn_cast<IntegerType>(Ty)) { | ||
unsigned BW = IT->getBitWidth(); | ||
return BW == 32 || BW == 64; | ||
return BW == 32 || (BW == 64 && Allow64); | ||
} | ||
Comment on lines
+16811
to
16815
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would just leave this alone and handle the one special case separately |
||
|
||
return false; | ||
|
@@ -16861,8 +16863,8 @@ static bool globalMemoryFPAtomicIsLegal(const GCNSubtarget &Subtarget, | |
|
||
/// \return Action to perform on AtomicRMWInsts for integer operations. | ||
static TargetLowering::AtomicExpansionKind | ||
atomicSupportedIfLegalIntType(const AtomicRMWInst *RMW) { | ||
return isAtomicRMWLegalIntTy(RMW->getType()) | ||
atomicSupportedIfLegalIntType(const AtomicRMWInst *RMW, bool Allow64 = true) { | ||
return isAtomicRMWLegalIntTy(RMW->getType(), Allow64) | ||
? TargetLowering::AtomicExpansionKind::None | ||
: TargetLowering::AtomicExpansionKind::CmpXChg; | ||
} | ||
|
@@ -16931,6 +16933,9 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { | |
case AtomicRMWInst::UIncWrap: | ||
case AtomicRMWInst::UDecWrap: | ||
return atomicSupportedIfLegalIntType(RMW); | ||
case AtomicRMWInst::USubCond: | ||
case AtomicRMWInst::USubSat: | ||
return atomicSupportedIfLegalIntType(RMW, false); | ||
case AtomicRMWInst::Sub: | ||
case AtomicRMWInst::Or: | ||
case AtomicRMWInst::Xor: { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These should be tested in one of the llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-* tests