Skip to content

Commit 4f4687d

Browse files
committed
[llvm][aarch64] Add support for the MS qualifiers __ptr32, __ptr64, __sptr, __uptr
1 parent 845cc96 commit 4f4687d

File tree

6 files changed

+282
-13
lines changed

6 files changed

+282
-13
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 79 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -530,6 +530,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
530530
setOperationAction(ISD::XOR, MVT::i32, Custom);
531531
setOperationAction(ISD::XOR, MVT::i64, Custom);
532532

533+
setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
534+
setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
535+
533536
// Virtually no operation on f128 is legal, but LLVM can't expand them when
534537
// there's a valid register class, so we need custom operations in most cases.
535538
setOperationAction(ISD::FABS, MVT::f128, Expand);
@@ -6880,6 +6883,37 @@ static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
68806883
ST->getBasePtr(), ST->getMemOperand());
68816884
}
68826885

6886+
static SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) {
6887+
SDLoc dl(Op);
6888+
SDValue Src = Op.getOperand(0);
6889+
MVT DestVT = Op.getSimpleValueType();
6890+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6891+
AddrSpaceCastSDNode *N = cast<AddrSpaceCastSDNode>(Op.getNode());
6892+
6893+
unsigned SrcAS = N->getSrcAddressSpace();
6894+
unsigned DestAS = N->getDestAddressSpace();
6895+
assert(SrcAS != DestAS &&
6896+
"addrspacecast must be between different address spaces");
6897+
assert(TLI.getTargetMachine().getPointerSize(SrcAS) !=
6898+
TLI.getTargetMachine().getPointerSize(DestAS) &&
6899+
"addrspacecast must be between different ptr sizes");
6900+
6901+
if (SrcAS == ARM64AS::PTR32_SPTR) {
6902+
return DAG.getNode(ISD::SIGN_EXTEND, dl, DestVT, Src,
6903+
DAG.getTargetConstant(0, dl, DestVT));
6904+
} else if (SrcAS == ARM64AS::PTR32_UPTR) {
6905+
return DAG.getNode(ISD::ZERO_EXTEND, dl, DestVT, Src,
6906+
DAG.getTargetConstant(0, dl, DestVT));
6907+
} else if ((DestAS == ARM64AS::PTR32_SPTR) ||
6908+
(DestAS == ARM64AS::PTR32_UPTR)) {
6909+
SDValue Ext = DAG.getAnyExtOrTrunc(Src, dl, DestVT);
6910+
SDValue Trunc = DAG.getZeroExtendInReg(Ext, dl, DestVT);
6911+
return Trunc;
6912+
} else {
6913+
return Src;
6914+
}
6915+
}
6916+
68836917
// Custom lowering for any store, vector or scalar and/or default or with
68846918
// a truncate operations. Currently only custom lower truncate operation
68856919
// from vector v4i16 to v4i8 or volatile stores of i128.
@@ -7541,6 +7575,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
75417575
case ISD::SIGN_EXTEND:
75427576
case ISD::ZERO_EXTEND:
75437577
return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
7578+
case ISD::ADDRSPACECAST:
7579+
return LowerADDRSPACECAST(Op, DAG);
75447580
case ISD::SIGN_EXTEND_INREG: {
75457581
// Only custom lower when ExtraVT has a legal byte based element type.
75467582
EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
@@ -23555,6 +23591,26 @@ static SDValue performLOADCombine(SDNode *N,
2355523591
performTBISimplification(N->getOperand(1), DCI, DAG);
2355623592

2355723593
LoadSDNode *LD = cast<LoadSDNode>(N);
23594+
EVT RegVT = LD->getValueType(0);
23595+
EVT MemVT = LD->getMemoryVT();
23596+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23597+
SDLoc DL(LD);
23598+
23599+
// Cast ptr32 and ptr64 pointers to the default address space before a load.
23600+
unsigned AddrSpace = LD->getAddressSpace();
23601+
if (AddrSpace == ARM64AS::PTR64 || AddrSpace == ARM64AS::PTR32_SPTR ||
23602+
AddrSpace == ARM64AS::PTR32_UPTR) {
23603+
MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
23604+
if (PtrVT != LD->getBasePtr().getSimpleValueType()) {
23605+
SDValue Cast =
23606+
DAG.getAddrSpaceCast(DL, PtrVT, LD->getBasePtr(), AddrSpace, 0);
23607+
return DAG.getExtLoad(LD->getExtensionType(), DL, RegVT, LD->getChain(),
23608+
Cast, LD->getPointerInfo(), MemVT,
23609+
LD->getOriginalAlign(),
23610+
LD->getMemOperand()->getFlags());
23611+
}
23612+
}
23613+
2355823614
if (LD->isVolatile() || !Subtarget->isLittleEndian())
2355923615
return SDValue(N, 0);
2356023616

@@ -23564,13 +23620,11 @@ static SDValue performLOADCombine(SDNode *N,
2356423620
if (!LD->isNonTemporal())
2356523621
return SDValue(N, 0);
2356623622

23567-
EVT MemVT = LD->getMemoryVT();
2356823623
if (MemVT.isScalableVector() || MemVT.getSizeInBits() <= 256 ||
2356923624
MemVT.getSizeInBits() % 256 == 0 ||
2357023625
256 % MemVT.getScalarSizeInBits() != 0)
2357123626
return SDValue(N, 0);
2357223627

23573-
SDLoc DL(LD);
2357423628
SDValue Chain = LD->getChain();
2357523629
SDValue BasePtr = LD->getBasePtr();
2357623630
SDNodeFlags Flags = LD->getFlags();
@@ -23830,12 +23884,28 @@ static SDValue performSTORECombine(SDNode *N,
2383023884
SDValue Value = ST->getValue();
2383123885
SDValue Ptr = ST->getBasePtr();
2383223886
EVT ValueVT = Value.getValueType();
23887+
EVT MemVT = ST->getMemoryVT();
23888+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23889+
SDLoc DL(ST);
2383323890

2383423891
auto hasValidElementTypeForFPTruncStore = [](EVT VT) {
2383523892
EVT EltVT = VT.getVectorElementType();
2383623893
return EltVT == MVT::f32 || EltVT == MVT::f64;
2383723894
};
2383823895

23896+
// Cast ptr32 and ptr64 pointers to the default address space before a store.
23897+
unsigned AddrSpace = ST->getAddressSpace();
23898+
if (AddrSpace == ARM64AS::PTR64 || AddrSpace == ARM64AS::PTR32_SPTR ||
23899+
AddrSpace == ARM64AS::PTR32_UPTR) {
23900+
MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
23901+
if (PtrVT != Ptr.getSimpleValueType()) {
23902+
SDValue Cast = DAG.getAddrSpaceCast(DL, PtrVT, Ptr, AddrSpace, 0);
23903+
return DAG.getStore(Chain, DL, Value, Cast, ST->getPointerInfo(),
23904+
ST->getOriginalAlign(),
23905+
ST->getMemOperand()->getFlags(), ST->getAAInfo());
23906+
}
23907+
}
23908+
2383923909
if (SDValue Res = combineI8TruncStore(ST, DAG, Subtarget))
2384023910
return Res;
2384123911

@@ -23849,8 +23919,8 @@ static SDValue performSTORECombine(SDNode *N,
2384923919
ValueVT.isFixedLengthVector() &&
2385023920
ValueVT.getFixedSizeInBits() >= Subtarget->getMinSVEVectorSizeInBits() &&
2385123921
hasValidElementTypeForFPTruncStore(Value.getOperand(0).getValueType()))
23852-
return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
23853-
ST->getMemoryVT(), ST->getMemOperand());
23922+
return DAG.getTruncStore(Chain, DL, Value.getOperand(0), Ptr, MemVT,
23923+
ST->getMemOperand());
2385423924

2385523925
if (SDValue Split = splitStores(N, DCI, DAG, Subtarget))
2385623926
return Split;
@@ -27391,6 +27461,11 @@ void AArch64TargetLowering::ReplaceNodeResults(
2739127461
ReplaceATOMIC_LOAD_128Results(N, Results, DAG, Subtarget);
2739227462
return;
2739327463
}
27464+
case ISD::ADDRSPACECAST: {
27465+
SDValue V = LowerADDRSPACECAST(SDValue(N, 0), DAG);
27466+
Results.push_back(V);
27467+
return;
27468+
}
2739427469
case ISD::ATOMIC_LOAD:
2739527470
case ISD::LOAD: {
2739627471
MemSDNode *LoadNode = cast<MemSDNode>(N);

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,10 @@ const unsigned StackProbeMaxLoopUnroll = 4;
563563

564564
} // namespace AArch64
565565

566+
namespace ARM64AS {
567+
enum : unsigned { PTR32_SPTR = 270, PTR32_UPTR = 271, PTR64 = 272 };
568+
}
569+
566570
class AArch64Subtarget;
567571

568572
class AArch64TargetLowering : public TargetLowering {
@@ -594,11 +598,19 @@ class AArch64TargetLowering : public TargetLowering {
594598
unsigned Depth) const override;
595599

596600
MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
597-
// Returning i64 unconditionally here (i.e. even for ILP32) means that the
598-
// *DAG* representation of pointers will always be 64-bits. They will be
599-
// truncated and extended when transferred to memory, but the 64-bit DAG
600-
// allows us to use AArch64's addressing modes much more easily.
601-
return MVT::getIntegerVT(64);
601+
if ((AS == ARM64AS::PTR32_SPTR) || (AS == ARM64AS::PTR32_UPTR)) {
602+
// These are 32-bit pointers created using the `__ptr32` extension or
603+
// similar. They are handled by marking them as being in a different
604+
// address space, and will be extended to 64-bits when used as the target
605+
// of a load or store operation, or cast to a 64-bit pointer type.
606+
return MVT::i32;
607+
} else {
608+
// Returning i64 unconditionally here (i.e. even for ILP32) means that the
609+
// *DAG* representation of pointers will always be 64-bits. They will be
610+
// truncated and extended when transferred to memory, but the 64-bit DAG
611+
// allows us to use AArch64's addressing modes much more easily.
612+
return MVT::i64;
613+
}
602614
}
603615

604616
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,

llvm/lib/Target/AArch64/AArch64TargetMachine.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,7 @@ class AArch64TargetMachine : public CodeGenTargetMachineImpl {
6868

6969
/// Returns true if a cast between SrcAS and DestAS is a noop.
7070
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
71-
// Addrspacecasts are always noops.
72-
return true;
71+
return getPointerSize(SrcAS) == getPointerSize(DestAS);
7372
}
7473

7574
private:

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2999,9 +2999,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
29992999
bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
30003000
LLT PtrTy = MRI.getType(LdSt.getPointerReg());
30013001

3002+
// Can only handle AddressSpace 0, 64-bit pointers.
30023003
if (PtrTy != LLT::pointer(0, 64)) {
3003-
LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
3004-
<< ", expected: " << LLT::pointer(0, 64) << '\n');
30053004
return false;
30063005
}
30073006

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s | FileCheck %s
3+
; RUN: llc --fast-isel < %s | FileCheck %s
4+
; RUN: llc --global-isel --global-isel-abort=2 < %s | FileCheck %s
5+
6+
; Source to regenerate:
7+
; struct Foo {
8+
; int * __ptr32 p32;
9+
; int * __ptr64 p64;
10+
; __attribute__((address_space(9))) int *p_other;
11+
; };
12+
; extern "C" void use_foo(Foo *f);
13+
; extern "C" int use_int(int i);
14+
; extern "C" void test_sign_ext(Foo *f, int * __ptr32 __sptr i) {
15+
; f->p64 = i;
16+
; use_foo(f);
17+
; }
18+
; extern "C" void test_sign_ext_store_load(int * __ptr32 __sptr i) {
19+
; *i = use_int(*i);
20+
; }
21+
; extern "C" void test_zero_ext(Foo *f, int * __ptr32 __uptr i) {
22+
; f->p64 = i;
23+
; use_foo(f);
24+
; }
25+
; extern "C" void test_zero_ext_store_load(int * __ptr32 __uptr i) {
26+
; *i = use_int(*i);
27+
; }
28+
; extern "C" void test_trunc(Foo *f, int * __ptr64 i) {
29+
; f->p32 = i;
30+
; use_foo(f);
31+
; }
32+
; extern "C" void test_noop1(Foo *f, int * __ptr32 i) {
33+
; f->p32 = i;
34+
; use_foo(f);
35+
; }
36+
; extern "C" void test_noop2(Foo *f, int * __ptr64 i) {
37+
; f->p64 = i;
38+
; use_foo(f);
39+
; }
40+
; extern "C" void test_null_arg(Foo *f, int * __ptr32 i) {
41+
; test_noop1(f, 0);
42+
; }
43+
; extern "C" void test_unrecognized(Foo *f, __attribute__((address_space(14))) int *i) {
44+
; f->p32 = (int * __ptr32)i;
45+
; use_foo(f);
46+
; }
47+
;
48+
; $ clang --target=aarch64-windows-msvc -fms-extensions -O2 -S -emit-llvm t.cpp
49+
50+
target datalayout = "e-m:w-p:64:64-i32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
51+
target triple = "aarch64-unknown-windows-msvc"
52+
53+
; Function Attrs: mustprogress uwtable
54+
define dso_local void @test_sign_ext(ptr noundef %f, ptr addrspace(270) noundef %i) local_unnamed_addr #0 {
55+
; CHECK-LABEL: test_sign_ext:
56+
; CHECK: // %bb.0: // %entry
57+
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
58+
; CHECK-NEXT: sxtw x8, w1
59+
; CHECK-NEXT: str x8, [x0, #8]
60+
; CHECK-NEXT: b use_foo
61+
entry:
62+
%0 = addrspacecast ptr addrspace(270) %i to ptr
63+
%p64 = getelementptr inbounds nuw i8, ptr %f, i64 8
64+
store ptr %0, ptr %p64, align 8
65+
tail call void @use_foo(ptr noundef %f)
66+
ret void
67+
}
68+
69+
declare dso_local void @use_foo(ptr noundef) local_unnamed_addr #1
70+
71+
; Function Attrs: mustprogress uwtable
72+
define dso_local void @test_sign_ext_store_load(ptr addrspace(270) nocapture noundef %i) local_unnamed_addr #0 {
73+
; CHECK-LABEL: test_sign_ext_store_load:
74+
; CHECK: // %bb.0: // %entry
75+
; CHECK: sxtw x19, w0
76+
; CHECK-NEXT: ldr w0, [x19]
77+
; CHECK-NEXT: bl use_int
78+
; CHECK-NEXT: str w0, [x19]
79+
entry:
80+
%0 = load i32, ptr addrspace(270) %i, align 4
81+
%call = tail call i32 @use_int(i32 noundef %0)
82+
store i32 %call, ptr addrspace(270) %i, align 4
83+
ret void
84+
}
85+
86+
declare dso_local i32 @use_int(i32 noundef) local_unnamed_addr #1
87+
88+
; Function Attrs: mustprogress uwtable
89+
define dso_local void @test_zero_ext(ptr noundef %f, ptr addrspace(271) noundef %i) local_unnamed_addr #0 {
90+
; CHECK-LABEL: test_zero_ext:
91+
; CHECK: // %bb.0: // %entry
92+
; CHECK-NEXT: mov w8, w1
93+
; CHECK-NEXT: str x8, [x0, #8]
94+
; CHECK-NEXT: b use_foo
95+
entry:
96+
%0 = addrspacecast ptr addrspace(271) %i to ptr
97+
%p64 = getelementptr inbounds nuw i8, ptr %f, i64 8
98+
store ptr %0, ptr %p64, align 8
99+
tail call void @use_foo(ptr noundef %f)
100+
ret void
101+
}
102+
103+
; Function Attrs: mustprogress uwtable
104+
define dso_local void @test_zero_ext_store_load(ptr addrspace(271) nocapture noundef %i) local_unnamed_addr #0 {
105+
; CHECK-LABEL: test_zero_ext_store_load:
106+
; CHECK: // %bb.0: // %entry
107+
; CHECK: mov w19, w0
108+
; CHECK-NEXT: ldr w0, [x19]
109+
; CHECK-NEXT: bl use_int
110+
; CHECK-NEXT: str w0, [x19]
111+
entry:
112+
%0 = load i32, ptr addrspace(271) %i, align 4
113+
%call = tail call i32 @use_int(i32 noundef %0)
114+
store i32 %call, ptr addrspace(271) %i, align 4
115+
ret void
116+
}
117+
118+
; Function Attrs: mustprogress uwtable
119+
define dso_local void @test_trunc(ptr noundef %f, ptr noundef %i) local_unnamed_addr #0 {
120+
; CHECK-LABEL: test_trunc:
121+
; CHECK: // %bb.0: // %entry
122+
; CHECK-NEXT: str w1, [x0]
123+
; CHECK-NEXT: b use_foo
124+
entry:
125+
%0 = addrspacecast ptr %i to ptr addrspace(270)
126+
store ptr addrspace(270) %0, ptr %f, align 8
127+
tail call void @use_foo(ptr noundef nonnull %f)
128+
ret void
129+
}
130+
131+
; Function Attrs: mustprogress uwtable
132+
define dso_local void @test_noop1(ptr noundef %f, ptr addrspace(270) noundef %i) local_unnamed_addr #0 {
133+
; CHECK-LABEL: test_noop1:
134+
; CHECK: // %bb.0: // %entry
135+
; CHECK-NEXT: str w1, [x0]
136+
; CHECK-NEXT: b use_foo
137+
entry:
138+
store ptr addrspace(270) %i, ptr %f, align 8
139+
tail call void @use_foo(ptr noundef nonnull %f)
140+
ret void
141+
}
142+
143+
; Function Attrs: mustprogress uwtable
144+
define dso_local void @test_noop2(ptr noundef %f, ptr noundef %i) local_unnamed_addr #0 {
145+
; CHECK-LABEL: test_noop2:
146+
; CHECK: // %bb.0: // %entry
147+
; CHECK-NEXT: str x1, [x0, #8]
148+
; CHECK-NEXT: b use_foo
149+
entry:
150+
%p64 = getelementptr inbounds nuw i8, ptr %f, i64 8
151+
store ptr %i, ptr %p64, align 8
152+
tail call void @use_foo(ptr noundef %f)
153+
ret void
154+
}
155+
156+
; Function Attrs: mustprogress uwtable
157+
define dso_local void @test_null_arg(ptr noundef %f, ptr addrspace(270) nocapture noundef readnone %i) local_unnamed_addr #0 {
158+
; CHECK-LABEL: test_null_arg:
159+
; CHECK: // %bb.0: // %entry
160+
; CHECK-NEXT: str wzr, [x0]
161+
; CHECK-NEXT: b use_foo
162+
entry:
163+
store ptr addrspace(270) null, ptr %f, align 8
164+
tail call void @use_foo(ptr noundef nonnull %f)
165+
ret void
166+
}
167+
168+
; Function Attrs: mustprogress uwtable
169+
define dso_local void @test_unrecognized(ptr noundef %f, ptr addrspace(14) noundef %i) local_unnamed_addr #0 {
170+
; CHECK-LABEL: test_unrecognized:
171+
; CHECK: // %bb.0: // %entry
172+
; CHECK-NEXT: str w1, [x0]
173+
; CHECK-NEXT: b use_foo
174+
entry:
175+
%0 = addrspacecast ptr addrspace(14) %i to ptr addrspace(270)
176+
store ptr addrspace(270) %0, ptr %f, align 8
177+
tail call void @use_foo(ptr noundef nonnull %f)
178+
ret void
179+
}
180+
181+
attributes #0 = { mustprogress uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" }
182+
attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" }

llvm/test/CodeGen/X86/mixed-ptr-sizes.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s | FileCheck %s --check-prefixes=ALL,CHECK
33
; RUN: llc -O0 < %s | FileCheck %s --check-prefixes=ALL,CHECK-O0
4+
; RUN: llc --fast-isel < %s | FileCheck %s --check-prefixes=ALL,CHECK
5+
; RUN: llc --global-isel --global-isel-abort=2 < %s | FileCheck %s --check-prefixes=ALL,CHECK
46

57
; Source to regenerate:
68
; struct Foo {

0 commit comments

Comments
 (0)