Skip to content

Commit 4d7df43

Browse files
committed
[AArch64] Out-of-line atomics (-moutline-atomics) implementation.
This patch implements out of line atomics for LSE deployment mechanism. Details how it works can be found in llvm/docs/Atomics.rst Options -moutline-atomics and -mno-outline-atomics to enable and disable it were added to clang driver. This is clang and llvm part of out-of-line atomics interface, library part is already supported by libgcc. Compiler-rt support is provided in separate patch. Differential Revision: https://reviews.llvm.org/D91157
1 parent 1285781 commit 4d7df43

22 files changed

+4938
-15
lines changed

clang/include/clang/Basic/DiagnosticDriverKinds.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,10 @@ def warn_drv_moutline_unsupported_opt : Warning<
491491
"The '%0' architecture does not support -moutline; flag ignored">,
492492
InGroup<OptionIgnored>;
493493

494+
def warn_drv_moutline_atomics_unsupported_opt : Warning<
495+
"The '%0' architecture does not support -moutline-atomics; flag ignored">,
496+
InGroup<OptionIgnored>;
497+
494498
def warn_drv_darwin_sdk_invalid_settings : Warning<
495499
"SDK settings were ignored as 'SDKSettings.json' could not be parsed">,
496500
InGroup<DiagGroup<"darwin-sdk-settings">>;

clang/include/clang/Driver/Options.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2664,6 +2664,10 @@ def msmall_data_threshold_EQ : Joined <["-"], "msmall-data-threshold=">,
26642664
Group<m_Group>, Alias<G>;
26652665
def msoft_float : Flag<["-"], "msoft-float">, Group<m_Group>, Flags<[CC1Option]>,
26662666
HelpText<"Use software floating point">;
2667+
def moutline_atomics : Flag<["-"], "moutline-atomics">, Group<f_clang_Group>, Flags<[CC1Option]>,
2668+
HelpText<"Generate local calls to out-of-line atomic operations">;
2669+
def mno_outline_atomics : Flag<["-"], "mno-outline-atomics">, Group<f_clang_Group>, Flags<[CC1Option]>,
2670+
HelpText<"Don't generate local calls to out-of-line atomic operations">;
26672671
def mno_implicit_float : Flag<["-"], "mno-implicit-float">, Group<m_Group>,
26682672
HelpText<"Don't generate implicit floating point instructions">;
26692673
def mimplicit_float : Flag<["-"], "mimplicit-float">, Group<m_Group>;

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6367,6 +6367,23 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
63676367
}
63686368
}
63696369

6370+
if (Arg *A = Args.getLastArg(options::OPT_moutline_atomics,
6371+
options::OPT_mno_outline_atomics)) {
6372+
if (A->getOption().matches(options::OPT_moutline_atomics)) {
6373+
// Option -moutline-atomics supported for AArch64 target only.
6374+
if (!Triple.isAArch64()) {
6375+
D.Diag(diag::warn_drv_moutline_atomics_unsupported_opt)
6376+
<< Triple.getArchName();
6377+
} else {
6378+
CmdArgs.push_back("-target-feature");
6379+
CmdArgs.push_back("+outline-atomics");
6380+
}
6381+
} else {
6382+
CmdArgs.push_back("-target-feature");
6383+
CmdArgs.push_back("-outline-atomics");
6384+
}
6385+
}
6386+
63706387
if (Args.hasFlag(options::OPT_faddrsig, options::OPT_fno_addrsig,
63716388
(TC.getTriple().isOSBinFormatELF() ||
63726389
TC.getTriple().isOSBinFormatCOFF()) &&

llvm/docs/Atomics.rst

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -621,3 +621,23 @@ fence on either side of a normal load or store.)
621621
There's also, somewhat separately, the possibility to lower ``ATOMIC_FENCE`` to
622622
``__sync_synchronize()``. This may happen or not happen independent of all the
623623
above, controlled purely by ``setOperationAction(ISD::ATOMIC_FENCE, ...)``.
624+
625+
On AArch64, a variant of the __sync_* routines is used which contain the memory
626+
order as part of the function name. These routines may determine at runtime
627+
whether the single-instruction atomic operations which were introduced as part
628+
of AArch64 Large System Extensions "LSE" instruction set are available, or if
629+
it needs to fall back to an LL/SC loop. The following helper functions are
630+
implemented in both ``compiler-rt`` and ``libgcc`` libraries
631+
(``N`` is one of 1, 2, 4, 8, and ``M`` is one of 1, 2, 4, 8 and 16, and
632+
``ORDER`` is one of 'relax', 'acq', 'rel', 'acq_rel')::
633+
634+
iM __aarch64_casM_ORDER(iM expected, iM desired, iM *ptr)
635+
iN __aarch64_swpN_ORDER(iN val, iN *ptr)
636+
iN __aarch64_ldaddN_ORDER(iN val, iN *ptr)
637+
iN __aarch64_ldclrN_ORDER(iN val, iN *ptr)
638+
iN __aarch64_ldeorN_ORDER(iN val, iN *ptr)
639+
iN __aarch64_ldsetN_ORDER(iN val, iN *ptr)
640+
641+
Please note, if LSE instruction set is specified for AArch64 target then
642+
out-of-line atomics calls are not generated and single-instruction atomic
643+
operations are used in place.

llvm/include/llvm/CodeGen/RuntimeLibcalls.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#define LLVM_CODEGEN_RUNTIMELIBCALLS_H
1616

1717
#include "llvm/CodeGen/ValueTypes.h"
18+
#include "llvm/Support/AtomicOrdering.h"
1819

1920
namespace llvm {
2021
namespace RTLIB {
@@ -60,6 +61,10 @@ namespace RTLIB {
6061
/// UNKNOWN_LIBCALL if there is none.
6162
Libcall getSYNC(unsigned Opc, MVT VT);
6263

64+
/// Return the outline atomics value for the given opcode, atomic ordering
65+
/// and type, or UNKNOWN_LIBCALL if there is none.
66+
Libcall getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order, MVT VT);
67+
6368
/// getMEMCPY_ELEMENT_UNORDERED_ATOMIC - Return
6469
/// MEMCPY_ELEMENT_UNORDERED_ATOMIC_* value for the given element size or
6570
/// UNKNOW_LIBCALL if there is none.

llvm/include/llvm/IR/RuntimeLibcalls.def

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,23 @@ HANDLE_LIBCALL(ATOMIC_FETCH_NAND_4, "__atomic_fetch_nand_4")
545545
HANDLE_LIBCALL(ATOMIC_FETCH_NAND_8, "__atomic_fetch_nand_8")
546546
HANDLE_LIBCALL(ATOMIC_FETCH_NAND_16, "__atomic_fetch_nand_16")
547547

548+
// Out-of-line atomics libcalls
549+
#define HLCALLS(A, N) \
550+
HANDLE_LIBCALL(A##N##_RELAX, nullptr) \
551+
HANDLE_LIBCALL(A##N##_ACQ, nullptr) \
552+
HANDLE_LIBCALL(A##N##_REL, nullptr) \
553+
HANDLE_LIBCALL(A##N##_ACQ_REL, nullptr)
554+
#define HLCALL5(A) \
555+
HLCALLS(A, 1) HLCALLS(A, 2) HLCALLS(A, 4) HLCALLS(A, 8) HLCALLS(A, 16)
556+
HLCALL5(OUTLINE_ATOMIC_CAS)
557+
HLCALL5(OUTLINE_ATOMIC_SWP)
558+
HLCALL5(OUTLINE_ATOMIC_LDADD)
559+
HLCALL5(OUTLINE_ATOMIC_LDSET)
560+
HLCALL5(OUTLINE_ATOMIC_LDCLR)
561+
HLCALL5(OUTLINE_ATOMIC_LDEOR)
562+
#undef HLCALLS
563+
#undef HLCALL5
564+
548565
// Stack Protector Fail
549566
HANDLE_LIBCALL(STACKPROTECTOR_CHECK_FAIL, "__stack_chk_fail")
550567

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4052,12 +4052,23 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
40524052
case ISD::ATOMIC_LOAD_UMAX:
40534053
case ISD::ATOMIC_CMP_SWAP: {
40544054
MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
4055-
RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT);
4056-
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
4057-
4055+
AtomicOrdering Order = cast<AtomicSDNode>(Node)->getOrdering();
4056+
RTLIB::Libcall LC = RTLIB::getOUTLINE_ATOMIC(Opc, Order, VT);
40584057
EVT RetVT = Node->getValueType(0);
4059-
SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end());
40604058
TargetLowering::MakeLibCallOptions CallOptions;
4059+
SmallVector<SDValue, 4> Ops;
4060+
if (TLI.getLibcallName(LC)) {
4061+
// If outline atomic available, prepare its arguments and expand.
4062+
Ops.append(Node->op_begin() + 2, Node->op_end());
4063+
Ops.push_back(Node->getOperand(1));
4064+
4065+
} else {
4066+
LC = RTLIB::getSYNC(Opc, VT);
4067+
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
4068+
"Unexpected atomic op or value type!");
4069+
// Arguments for expansion to sync libcall
4070+
Ops.append(Node->op_begin() + 1, Node->op_end());
4071+
}
40614072
std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT,
40624073
Ops, CallOptions,
40634074
SDLoc(Node),

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2169,12 +2169,22 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
21692169
std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
21702170
unsigned Opc = Node->getOpcode();
21712171
MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
2172-
RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT);
2173-
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
2174-
2172+
AtomicOrdering order = cast<AtomicSDNode>(Node)->getOrdering();
2173+
// Lower to outline atomic libcall if outline atomics enabled,
2174+
// or to sync libcall otherwise
2175+
RTLIB::Libcall LC = RTLIB::getOUTLINE_ATOMIC(Opc, order, VT);
21752176
EVT RetVT = Node->getValueType(0);
2176-
SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end());
21772177
TargetLowering::MakeLibCallOptions CallOptions;
2178+
SmallVector<SDValue, 4> Ops;
2179+
if (TLI.getLibcallName(LC)) {
2180+
Ops.append(Node->op_begin() + 2, Node->op_end());
2181+
Ops.push_back(Node->getOperand(1));
2182+
} else {
2183+
LC = RTLIB::getSYNC(Opc, VT);
2184+
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
2185+
"Unexpected atomic op or value type!");
2186+
Ops.append(Node->op_begin() + 1, Node->op_end());
2187+
}
21782188
return TLI.makeLibCall(DAG, LC, RetVT, Ops, CallOptions, SDLoc(Node),
21792189
Node->getOperand(0));
21802190
}

llvm/lib/CodeGen/TargetLoweringBase.cpp

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,83 @@ RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
448448
return UNKNOWN_LIBCALL;
449449
}
450450

451+
RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order,
452+
MVT VT) {
453+
unsigned ModeN, ModelN;
454+
switch (VT.SimpleTy) {
455+
case MVT::i8:
456+
ModeN = 0;
457+
break;
458+
case MVT::i16:
459+
ModeN = 1;
460+
break;
461+
case MVT::i32:
462+
ModeN = 2;
463+
break;
464+
case MVT::i64:
465+
ModeN = 3;
466+
break;
467+
case MVT::i128:
468+
ModeN = 4;
469+
break;
470+
default:
471+
return UNKNOWN_LIBCALL;
472+
}
473+
474+
switch (Order) {
475+
case AtomicOrdering::Monotonic:
476+
ModelN = 0;
477+
break;
478+
case AtomicOrdering::Acquire:
479+
ModelN = 1;
480+
break;
481+
case AtomicOrdering::Release:
482+
ModelN = 2;
483+
break;
484+
case AtomicOrdering::AcquireRelease:
485+
case AtomicOrdering::SequentiallyConsistent:
486+
ModelN = 3;
487+
break;
488+
default:
489+
return UNKNOWN_LIBCALL;
490+
}
491+
492+
#define LCALLS(A, B) \
493+
{ A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL }
494+
#define LCALL5(A) \
495+
LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
496+
switch (Opc) {
497+
case ISD::ATOMIC_CMP_SWAP: {
498+
const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_CAS)};
499+
return LC[ModeN][ModelN];
500+
}
501+
case ISD::ATOMIC_SWAP: {
502+
const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_SWP)};
503+
return LC[ModeN][ModelN];
504+
}
505+
case ISD::ATOMIC_LOAD_ADD: {
506+
const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDADD)};
507+
return LC[ModeN][ModelN];
508+
}
509+
case ISD::ATOMIC_LOAD_OR: {
510+
const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDSET)};
511+
return LC[ModeN][ModelN];
512+
}
513+
case ISD::ATOMIC_LOAD_CLR: {
514+
const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDCLR)};
515+
return LC[ModeN][ModelN];
516+
}
517+
case ISD::ATOMIC_LOAD_XOR: {
518+
const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDEOR)};
519+
return LC[ModeN][ModelN];
520+
}
521+
default:
522+
return UNKNOWN_LIBCALL;
523+
}
524+
#undef LCALLS
525+
#undef LCALL5
526+
}
527+
451528
RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) {
452529
#define OP_TO_LIBCALL(Name, Enum) \
453530
case Name: \

llvm/lib/Target/AArch64/AArch64.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
6161
def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true",
6262
"Enable ARMv8.1 Large System Extension (LSE) atomic instructions">;
6363

64+
def FeatureOutlineAtomics : SubtargetFeature<"outline-atomics", "OutlineAtomics", "true",
65+
"Enable out of line atomics to support LSE instructions">;
66+
6467
def FeatureRDM : SubtargetFeature<"rdm", "HasRDM", "true",
6568
"Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions">;
6669

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 72 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -662,6 +662,57 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
662662
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
663663
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
664664

665+
// Generate outline atomics library calls only if LSE was not specified for
666+
// subtarget
667+
if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
668+
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, LibCall);
669+
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, LibCall);
670+
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
671+
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, LibCall);
672+
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, LibCall);
673+
setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, LibCall);
674+
setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, LibCall);
675+
setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
676+
setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, LibCall);
677+
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, LibCall);
678+
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, LibCall);
679+
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
680+
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, LibCall);
681+
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, LibCall);
682+
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, LibCall);
683+
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
684+
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, LibCall);
685+
setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i8, LibCall);
686+
setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i16, LibCall);
687+
setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i32, LibCall);
688+
setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i64, LibCall);
689+
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, LibCall);
690+
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, LibCall);
691+
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
692+
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, LibCall);
693+
#define LCALLNAMES(A, B, N) \
694+
setLibcallName(A##N##_RELAX, #B #N "_relax"); \
695+
setLibcallName(A##N##_ACQ, #B #N "_acq"); \
696+
setLibcallName(A##N##_REL, #B #N "_rel"); \
697+
setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
698+
#define LCALLNAME4(A, B) \
699+
LCALLNAMES(A, B, 1) \
700+
LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
701+
#define LCALLNAME5(A, B) \
702+
LCALLNAMES(A, B, 1) \
703+
LCALLNAMES(A, B, 2) \
704+
LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
705+
LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
706+
LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
707+
LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
708+
LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
709+
LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
710+
LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
711+
#undef LCALLNAMES
712+
#undef LCALLNAME4
713+
#undef LCALLNAME5
714+
}
715+
665716
// 128-bit loads and stores can be done without expanding
666717
setOperationAction(ISD::LOAD, MVT::i128, Custom);
667718
setOperationAction(ISD::STORE, MVT::i128, Custom);
@@ -10078,7 +10129,7 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
1007810129
SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
1007910130
SelectionDAG &DAG) const {
1008010131
auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
10081-
if (!Subtarget.hasLSE())
10132+
if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
1008210133
return SDValue();
1008310134

1008410135
// LSE has an atomic load-add instruction, but not a load-sub.
@@ -10095,7 +10146,7 @@ SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
1009510146
SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
1009610147
SelectionDAG &DAG) const {
1009710148
auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
10098-
if (!Subtarget.hasLSE())
10149+
if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
1009910150
return SDValue();
1010010151

1010110152
// LSE has an atomic load-clear instruction, but not a load-and.
@@ -15549,7 +15600,7 @@ static void ReplaceCMP_SWAP_128Results(SDNode *N,
1554915600
assert(N->getValueType(0) == MVT::i128 &&
1555015601
"AtomicCmpSwap on types less than 128 should be legal");
1555115602

15552-
if (Subtarget->hasLSE()) {
15603+
if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) {
1555315604
// LSE has a 128-bit compare and swap (CASP), but i128 is not a legal type,
1555415605
// so lower it here, wrapped in REG_SEQUENCE and EXTRACT_SUBREG.
1555515606
SDValue Ops[] = {
@@ -15778,14 +15829,30 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
1577815829
// Nand not supported in LSE.
1577915830
if (AI->getOperation() == AtomicRMWInst::Nand) return AtomicExpansionKind::LLSC;
1578015831
// Leave 128 bits to LLSC.
15781-
return (Subtarget->hasLSE() && Size < 128) ? AtomicExpansionKind::None : AtomicExpansionKind::LLSC;
15832+
if (Subtarget->hasLSE() && Size < 128)
15833+
return AtomicExpansionKind::None;
15834+
if (Subtarget->outlineAtomics() && Size < 128) {
15835+
// [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
15836+
// Don't outline them unless
15837+
// (1) high level <atomic> support approved:
15838+
// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
15839+
// (2) low level libgcc and compiler-rt support implemented by:
15840+
// min/max outline atomics helpers
15841+
if (AI->getOperation() != AtomicRMWInst::Min &&
15842+
AI->getOperation() != AtomicRMWInst::Max &&
15843+
AI->getOperation() != AtomicRMWInst::UMin &&
15844+
AI->getOperation() != AtomicRMWInst::UMax) {
15845+
return AtomicExpansionKind::None;
15846+
}
15847+
}
15848+
return AtomicExpansionKind::LLSC;
1578215849
}
1578315850

1578415851
TargetLowering::AtomicExpansionKind
1578515852
AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
1578615853
AtomicCmpXchgInst *AI) const {
1578715854
// If subtarget has LSE, leave cmpxchg intact for codegen.
15788-
if (Subtarget->hasLSE())
15855+
if (Subtarget->hasLSE() || Subtarget->outlineAtomics())
1578915856
return AtomicExpansionKind::None;
1579015857
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
1579115858
// implement cmpxchg without spilling. If the address being exchanged is also

llvm/lib/Target/AArch64/AArch64Subtarget.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
195195
// Enable 64-bit vectorization in SLP.
196196
unsigned MinVectorRegisterBitWidth = 64;
197197

198+
bool OutlineAtomics = false;
198199
bool UseAA = false;
199200
bool PredictableSelectIsExpensive = false;
200201
bool BalanceFPOps = false;
@@ -471,6 +472,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
471472

472473
bool useAA() const override { return UseAA; }
473474

475+
bool outlineAtomics() const { return OutlineAtomics; }
476+
474477
bool hasVH() const { return HasVH; }
475478
bool hasPAN() const { return HasPAN; }
476479
bool hasLOR() const { return HasLOR; }

0 commit comments

Comments
 (0)