Skip to content

Commit 836467b

Browse files
committed
[llvm][aarch64] Add support for the MS qualifiers __ptr32, __ptr64, __sptr, __uptr
1 parent aa70d84 commit 836467b

File tree

5 files changed

+279
-11
lines changed

5 files changed

+279
-11
lines changed

llvm/lib/Target/AArch64/AArch64.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,11 @@ void initializeSMEABIPass(PassRegistry &);
114114
void initializeSMEPeepholeOptPass(PassRegistry &);
115115
void initializeSVEIntrinsicOptsPass(PassRegistry &);
116116
void initializeAArch64Arm64ECCallLoweringPass(PassRegistry &);
117+
118+
namespace ARM64AS {
119+
enum : unsigned { PTR32_SPTR = 270, PTR32_UPTR = 271, PTR64 = 272 };
120+
}
121+
117122
} // end namespace llvm
118123

119124
#endif

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 79 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
533533
setOperationAction(ISD::XOR, MVT::i32, Custom);
534534
setOperationAction(ISD::XOR, MVT::i64, Custom);
535535

536+
setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
537+
setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
538+
536539
// Virtually no operation on f128 is legal, but LLVM can't expand them when
537540
// there's a valid register class, so we need custom operations in most cases.
538541
setOperationAction(ISD::FABS, MVT::f128, Expand);
@@ -6722,6 +6725,37 @@ static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
67226725
ST->getBasePtr(), ST->getMemOperand());
67236726
}
67246727

6728+
static SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) {
6729+
SDLoc dl(Op);
6730+
SDValue Src = Op.getOperand(0);
6731+
MVT DestVT = Op.getSimpleValueType();
6732+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6733+
AddrSpaceCastSDNode *N = cast<AddrSpaceCastSDNode>(Op.getNode());
6734+
6735+
unsigned SrcAS = N->getSrcAddressSpace();
6736+
unsigned DestAS = N->getDestAddressSpace();
6737+
assert(SrcAS != DestAS &&
6738+
"addrspacecast must be between different address spaces");
6739+
assert(TLI.getTargetMachine().getPointerSize(SrcAS) !=
6740+
TLI.getTargetMachine().getPointerSize(DestAS) &&
6741+
"addrspacecast must be between different ptr sizes");
6742+
6743+
if (SrcAS == ARM64AS::PTR32_SPTR) {
6744+
return DAG.getNode(ISD::SIGN_EXTEND, dl, DestVT, Src,
6745+
DAG.getTargetConstant(0, dl, DestVT));
6746+
} else if (SrcAS == ARM64AS::PTR32_UPTR) {
6747+
return DAG.getNode(ISD::ZERO_EXTEND, dl, DestVT, Src,
6748+
DAG.getTargetConstant(0, dl, DestVT));
6749+
} else if ((DestAS == ARM64AS::PTR32_SPTR) ||
6750+
(DestAS == ARM64AS::PTR32_UPTR)) {
6751+
SDValue Ext = DAG.getAnyExtOrTrunc(Src, dl, DestVT);
6752+
SDValue Trunc = DAG.getZeroExtendInReg(Ext, dl, DestVT);
6753+
return Trunc;
6754+
} else {
6755+
return Src;
6756+
}
6757+
}
6758+
67256759
// Custom lowering for any store, vector or scalar and/or default or with
67266760
// a truncate operations. Currently only custom lower truncate operation
67276761
// from vector v4i16 to v4i8 or volatile stores of i128.
@@ -7375,6 +7409,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
73757409
case ISD::SIGN_EXTEND:
73767410
case ISD::ZERO_EXTEND:
73777411
return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
7412+
case ISD::ADDRSPACECAST:
7413+
return LowerADDRSPACECAST(Op, DAG);
73787414
case ISD::SIGN_EXTEND_INREG: {
73797415
// Only custom lower when ExtraVT has a legal byte based element type.
73807416
EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
@@ -23361,6 +23397,26 @@ static SDValue performLOADCombine(SDNode *N,
2336123397
performTBISimplification(N->getOperand(1), DCI, DAG);
2336223398

2336323399
LoadSDNode *LD = cast<LoadSDNode>(N);
23400+
EVT RegVT = LD->getValueType(0);
23401+
EVT MemVT = LD->getMemoryVT();
23402+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23403+
SDLoc DL(LD);
23404+
23405+
// Cast ptr32 and ptr64 pointers to the default address space before a load.
23406+
unsigned AddrSpace = LD->getAddressSpace();
23407+
if (AddrSpace == ARM64AS::PTR64 || AddrSpace == ARM64AS::PTR32_SPTR ||
23408+
AddrSpace == ARM64AS::PTR32_UPTR) {
23409+
MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
23410+
if (PtrVT != LD->getBasePtr().getSimpleValueType()) {
23411+
SDValue Cast =
23412+
DAG.getAddrSpaceCast(DL, PtrVT, LD->getBasePtr(), AddrSpace, 0);
23413+
return DAG.getExtLoad(LD->getExtensionType(), DL, RegVT, LD->getChain(),
23414+
Cast, LD->getPointerInfo(), MemVT,
23415+
LD->getOriginalAlign(),
23416+
LD->getMemOperand()->getFlags());
23417+
}
23418+
}
23419+
2336423420
if (LD->isVolatile() || !Subtarget->isLittleEndian())
2336523421
return SDValue(N, 0);
2336623422

@@ -23370,13 +23426,11 @@ static SDValue performLOADCombine(SDNode *N,
2337023426
if (!LD->isNonTemporal())
2337123427
return SDValue(N, 0);
2337223428

23373-
EVT MemVT = LD->getMemoryVT();
2337423429
if (MemVT.isScalableVector() || MemVT.getSizeInBits() <= 256 ||
2337523430
MemVT.getSizeInBits() % 256 == 0 ||
2337623431
256 % MemVT.getScalarSizeInBits() != 0)
2337723432
return SDValue(N, 0);
2337823433

23379-
SDLoc DL(LD);
2338023434
SDValue Chain = LD->getChain();
2338123435
SDValue BasePtr = LD->getBasePtr();
2338223436
SDNodeFlags Flags = LD->getFlags();
@@ -23636,12 +23690,28 @@ static SDValue performSTORECombine(SDNode *N,
2363623690
SDValue Value = ST->getValue();
2363723691
SDValue Ptr = ST->getBasePtr();
2363823692
EVT ValueVT = Value.getValueType();
23693+
EVT MemVT = ST->getMemoryVT();
23694+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23695+
SDLoc DL(ST);
2363923696

2364023697
auto hasValidElementTypeForFPTruncStore = [](EVT VT) {
2364123698
EVT EltVT = VT.getVectorElementType();
2364223699
return EltVT == MVT::f32 || EltVT == MVT::f64;
2364323700
};
2364423701

23702+
// Cast ptr32 and ptr64 pointers to the default address space before a store.
23703+
unsigned AddrSpace = ST->getAddressSpace();
23704+
if (AddrSpace == ARM64AS::PTR64 || AddrSpace == ARM64AS::PTR32_SPTR ||
23705+
AddrSpace == ARM64AS::PTR32_UPTR) {
23706+
MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
23707+
if (PtrVT != Ptr.getSimpleValueType()) {
23708+
SDValue Cast = DAG.getAddrSpaceCast(DL, PtrVT, Ptr, AddrSpace, 0);
23709+
return DAG.getStore(Chain, DL, Value, Cast, ST->getPointerInfo(),
23710+
ST->getOriginalAlign(),
23711+
ST->getMemOperand()->getFlags(), ST->getAAInfo());
23712+
}
23713+
}
23714+
2364523715
if (SDValue Res = combineI8TruncStore(ST, DAG, Subtarget))
2364623716
return Res;
2364723717

@@ -23655,8 +23725,8 @@ static SDValue performSTORECombine(SDNode *N,
2365523725
ValueVT.isFixedLengthVector() &&
2365623726
ValueVT.getFixedSizeInBits() >= Subtarget->getMinSVEVectorSizeInBits() &&
2365723727
hasValidElementTypeForFPTruncStore(Value.getOperand(0).getValueType()))
23658-
return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
23659-
ST->getMemoryVT(), ST->getMemOperand());
23728+
return DAG.getTruncStore(Chain, DL, Value.getOperand(0), Ptr, MemVT,
23729+
ST->getMemOperand());
2366023730

2366123731
if (SDValue Split = splitStores(N, DCI, DAG, Subtarget))
2366223732
return Split;
@@ -26983,6 +27053,11 @@ void AArch64TargetLowering::ReplaceNodeResults(
2698327053
ReplaceATOMIC_LOAD_128Results(N, Results, DAG, Subtarget);
2698427054
return;
2698527055
}
27056+
case ISD::ADDRSPACECAST: {
27057+
SDValue V = LowerADDRSPACECAST(SDValue(N, 0), DAG);
27058+
Results.push_back(V);
27059+
return;
27060+
}
2698627061
case ISD::ATOMIC_LOAD:
2698727062
case ISD::LOAD: {
2698827063
MemSDNode *LoadNode = cast<MemSDNode>(N);

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "llvm/CodeGen/TargetLowering.h"
2121
#include "llvm/IR/CallingConv.h"
2222
#include "llvm/IR/Instruction.h"
23+
#include "AArch64.h"
2324

2425
namespace llvm {
2526

@@ -585,11 +586,19 @@ class AArch64TargetLowering : public TargetLowering {
585586
unsigned Depth) const override;
586587

587588
MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
588-
// Returning i64 unconditionally here (i.e. even for ILP32) means that the
589-
// *DAG* representation of pointers will always be 64-bits. They will be
590-
// truncated and extended when transferred to memory, but the 64-bit DAG
591-
// allows us to use AArch64's addressing modes much more easily.
592-
return MVT::getIntegerVT(64);
589+
if ((AS == ARM64AS::PTR32_SPTR) || (AS == ARM64AS::PTR32_UPTR)) {
590+
// These are 32-bit pointers created using the `__ptr32` extension or
591+
// similar. They are handled by marking them as being in a different
592+
// address space, and will be extended to 64-bits when used as the target
593+
// of a load or store operation, or cast to a 64-bit pointer type.
594+
return MVT::i32;
595+
} else {
596+
// Returning i64 unconditionally here (i.e. even for ILP32) means that the
597+
// *DAG* representation of pointers will always be 64-bits. They will be
598+
// truncated and extended when transferred to memory, but the 64-bit DAG
599+
// allows us to use AArch64's addressing modes much more easily.
600+
return MVT::i64;
601+
}
593602
}
594603

595604
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,

llvm/lib/Target/AArch64/AArch64TargetMachine.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,7 @@ class AArch64TargetMachine : public LLVMTargetMachine {
6565

6666
/// Returns true if a cast between SrcAS and DestAS is a noop.
6767
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
68-
// Addrspacecasts are always noops.
69-
return true;
68+
return (getPointerSize(SrcAS) == getPointerSize(DestAS));
7069
}
7170

7271
private:
Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s | FileCheck %s
3+
4+
; Source to regenerate:
5+
; struct Foo {
6+
; int * __ptr32 p32;
7+
; int * __ptr64 p64;
8+
; __attribute__((address_space(9))) int *p_other;
9+
; };
10+
; extern "C" void use_foo(Foo *f);
11+
; extern "C" int use_int(int i);
12+
; extern "C" void test_sign_ext(Foo *f, int * __ptr32 __sptr i) {
13+
; f->p64 = i;
14+
; use_foo(f);
15+
; }
16+
; extern "C" void test_sign_ext_store_load(int * __ptr32 __sptr i) {
17+
; *i = use_int(*i);
18+
; }
19+
; extern "C" void test_zero_ext(Foo *f, int * __ptr32 __uptr i) {
20+
; f->p64 = i;
21+
; use_foo(f);
22+
; }
23+
; extern "C" void test_zero_ext_store_load(int * __ptr32 __uptr i) {
24+
; *i = use_int(*i);
25+
; }
26+
; extern "C" void test_trunc(Foo *f, int * __ptr64 i) {
27+
; f->p32 = i;
28+
; use_foo(f);
29+
; }
30+
; extern "C" void test_noop1(Foo *f, int * __ptr32 i) {
31+
; f->p32 = i;
32+
; use_foo(f);
33+
; }
34+
; extern "C" void test_noop2(Foo *f, int * __ptr64 i) {
35+
; f->p64 = i;
36+
; use_foo(f);
37+
; }
38+
; extern "C" void test_null_arg(Foo *f, int * __ptr32 i) {
39+
; test_noop1(f, 0);
40+
; }
41+
; extern "C" void test_unrecognized(Foo *f, __attribute__((address_space(14))) int *i) {
42+
; f->p32 = (int * __ptr32)i;
43+
; use_foo(f);
44+
; }
45+
;
46+
; $ clang --target=aarch64-windows-msvc -fms-extensions -O2 -S -emit-llvm t.cpp
47+
48+
target datalayout = "e-m:w-p:64:64-i32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
49+
target triple = "aarch64-unknown-windows-msvc"
50+
51+
; Function Attrs: mustprogress uwtable
52+
define dso_local void @test_sign_ext(ptr noundef %f, ptr addrspace(270) noundef %i) local_unnamed_addr #0 {
53+
; CHECK-LABEL: test_sign_ext:
54+
; CHECK: // %bb.0: // %entry
55+
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
56+
; CHECK-NEXT: sxtw x8, w1
57+
; CHECK-NEXT: str x8, [x0, #8]
58+
; CHECK-NEXT: b use_foo
59+
entry:
60+
%0 = addrspacecast ptr addrspace(270) %i to ptr
61+
%p64 = getelementptr inbounds nuw i8, ptr %f, i64 8
62+
store ptr %0, ptr %p64, align 8
63+
tail call void @use_foo(ptr noundef %f)
64+
ret void
65+
}
66+
67+
declare dso_local void @use_foo(ptr noundef) local_unnamed_addr #1
68+
69+
; Function Attrs: mustprogress uwtable
70+
define dso_local void @test_sign_ext_store_load(ptr addrspace(270) nocapture noundef %i) local_unnamed_addr #0 {
71+
; CHECK-LABEL: test_sign_ext_store_load:
72+
; CHECK: // %bb.0: // %entry
73+
; CHECK: sxtw x19, w0
74+
; CHECK-NEXT: ldr w0, [x19]
75+
; CHECK-NEXT: bl use_int
76+
; CHECK-NEXT: str w0, [x19]
77+
entry:
78+
%0 = load i32, ptr addrspace(270) %i, align 4
79+
%call = tail call i32 @use_int(i32 noundef %0)
80+
store i32 %call, ptr addrspace(270) %i, align 4
81+
ret void
82+
}
83+
84+
declare dso_local i32 @use_int(i32 noundef) local_unnamed_addr #1
85+
86+
; Function Attrs: mustprogress uwtable
87+
define dso_local void @test_zero_ext(ptr noundef %f, ptr addrspace(271) noundef %i) local_unnamed_addr #0 {
88+
; CHECK-LABEL: test_zero_ext:
89+
; CHECK: // %bb.0: // %entry
90+
; CHECK-NEXT: mov w8, w1
91+
; CHECK-NEXT: str x8, [x0, #8]
92+
; CHECK-NEXT: b use_foo
93+
entry:
94+
%0 = addrspacecast ptr addrspace(271) %i to ptr
95+
%p64 = getelementptr inbounds nuw i8, ptr %f, i64 8
96+
store ptr %0, ptr %p64, align 8
97+
tail call void @use_foo(ptr noundef %f)
98+
ret void
99+
}
100+
101+
; Function Attrs: mustprogress uwtable
102+
define dso_local void @test_zero_ext_store_load(ptr addrspace(271) nocapture noundef %i) local_unnamed_addr #0 {
103+
; CHECK-LABEL: test_zero_ext_store_load:
104+
; CHECK: // %bb.0: // %entry
105+
; CHECK: mov w19, w0
106+
; CHECK-NEXT: ldr w0, [x19]
107+
; CHECK-NEXT: bl use_int
108+
; CHECK-NEXT: str w0, [x19]
109+
entry:
110+
%0 = load i32, ptr addrspace(271) %i, align 4
111+
%call = tail call i32 @use_int(i32 noundef %0)
112+
store i32 %call, ptr addrspace(271) %i, align 4
113+
ret void
114+
}
115+
116+
; Function Attrs: mustprogress uwtable
117+
define dso_local void @test_trunc(ptr noundef %f, ptr noundef %i) local_unnamed_addr #0 {
118+
; CHECK-LABEL: test_trunc:
119+
; CHECK: // %bb.0: // %entry
120+
; CHECK-NEXT: str w1, [x0]
121+
; CHECK-NEXT: b use_foo
122+
entry:
123+
%0 = addrspacecast ptr %i to ptr addrspace(270)
124+
store ptr addrspace(270) %0, ptr %f, align 8
125+
tail call void @use_foo(ptr noundef nonnull %f)
126+
ret void
127+
}
128+
129+
; Function Attrs: mustprogress uwtable
130+
define dso_local void @test_noop1(ptr noundef %f, ptr addrspace(270) noundef %i) local_unnamed_addr #0 {
131+
; CHECK-LABEL: test_noop1:
132+
; CHECK: // %bb.0: // %entry
133+
; CHECK-NEXT: str w1, [x0]
134+
; CHECK-NEXT: b use_foo
135+
entry:
136+
store ptr addrspace(270) %i, ptr %f, align 8
137+
tail call void @use_foo(ptr noundef nonnull %f)
138+
ret void
139+
}
140+
141+
; Function Attrs: mustprogress uwtable
142+
define dso_local void @test_noop2(ptr noundef %f, ptr noundef %i) local_unnamed_addr #0 {
143+
; CHECK-LABEL: test_noop2:
144+
; CHECK: // %bb.0: // %entry
145+
; CHECK-NEXT: str x1, [x0, #8]
146+
; CHECK-NEXT: b use_foo
147+
entry:
148+
%p64 = getelementptr inbounds nuw i8, ptr %f, i64 8
149+
store ptr %i, ptr %p64, align 8
150+
tail call void @use_foo(ptr noundef %f)
151+
ret void
152+
}
153+
154+
; Function Attrs: mustprogress uwtable
155+
define dso_local void @test_null_arg(ptr noundef %f, ptr addrspace(270) nocapture noundef readnone %i) local_unnamed_addr #0 {
156+
; CHECK-LABEL: test_null_arg:
157+
; CHECK: // %bb.0: // %entry
158+
; CHECK-NEXT: str wzr, [x0]
159+
; CHECK-NEXT: b use_foo
160+
entry:
161+
store ptr addrspace(270) null, ptr %f, align 8
162+
tail call void @use_foo(ptr noundef nonnull %f)
163+
ret void
164+
}
165+
166+
; Function Attrs: mustprogress uwtable
167+
define dso_local void @test_unrecognized(ptr noundef %f, ptr addrspace(14) noundef %i) local_unnamed_addr #0 {
168+
; CHECK-LABEL: test_unrecognized:
169+
; CHECK: // %bb.0: // %entry
170+
; CHECK-NEXT: str w1, [x0]
171+
; CHECK-NEXT: b use_foo
172+
entry:
173+
%0 = addrspacecast ptr addrspace(14) %i to ptr addrspace(270)
174+
store ptr addrspace(270) %0, ptr %f, align 8
175+
tail call void @use_foo(ptr noundef nonnull %f)
176+
ret void
177+
}
178+
179+
attributes #0 = { mustprogress uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" }
180+
attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" }

0 commit comments

Comments
 (0)