Skip to content

[llvm][aarch64] Add support for the MS qualifiers __ptr32, __ptr64, _sptr, __uptr #112793

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 79 additions & 4 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::XOR, MVT::i32, Custom);
setOperationAction(ISD::XOR, MVT::i64, Custom);

setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);

// Virtually no operation on f128 is legal, but LLVM can't expand them when
// there's a valid register class, so we need custom operations in most cases.
setOperationAction(ISD::FABS, MVT::f128, Expand);
Expand Down Expand Up @@ -6880,6 +6883,37 @@ static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
ST->getBasePtr(), ST->getMemOperand());
}

static SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
SDValue Src = Op.getOperand(0);
MVT DestVT = Op.getSimpleValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
AddrSpaceCastSDNode *N = cast<AddrSpaceCastSDNode>(Op.getNode());

unsigned SrcAS = N->getSrcAddressSpace();
unsigned DestAS = N->getDestAddressSpace();
assert(SrcAS != DestAS &&
"addrspacecast must be between different address spaces");
assert(TLI.getTargetMachine().getPointerSize(SrcAS) !=
TLI.getTargetMachine().getPointerSize(DestAS) &&
"addrspacecast must be between different ptr sizes");

if (SrcAS == ARM64AS::PTR32_SPTR) {
return DAG.getNode(ISD::SIGN_EXTEND, dl, DestVT, Src,
DAG.getTargetConstant(0, dl, DestVT));
} else if (SrcAS == ARM64AS::PTR32_UPTR) {
return DAG.getNode(ISD::ZERO_EXTEND, dl, DestVT, Src,
DAG.getTargetConstant(0, dl, DestVT));
} else if ((DestAS == ARM64AS::PTR32_SPTR) ||
(DestAS == ARM64AS::PTR32_UPTR)) {
SDValue Ext = DAG.getAnyExtOrTrunc(Src, dl, DestVT);
SDValue Trunc = DAG.getZeroExtendInReg(Ext, dl, DestVT);
return Trunc;
} else {
return Src;
}
}

// Custom lowering for any store, vector or scalar and/or default or with
// a truncate operations. Currently only custom lower truncate operation
// from vector v4i16 to v4i8 or volatile stores of i128.
Expand Down Expand Up @@ -7541,6 +7575,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
case ISD::ADDRSPACECAST:
return LowerADDRSPACECAST(Op, DAG);
case ISD::SIGN_EXTEND_INREG: {
// Only custom lower when ExtraVT has a legal byte based element type.
EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
Expand Down Expand Up @@ -23555,6 +23591,26 @@ static SDValue performLOADCombine(SDNode *N,
performTBISimplification(N->getOperand(1), DCI, DAG);

LoadSDNode *LD = cast<LoadSDNode>(N);
EVT RegVT = LD->getValueType(0);
EVT MemVT = LD->getMemoryVT();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc DL(LD);

// Cast ptr32 and ptr64 pointers to the default address space before a load.
unsigned AddrSpace = LD->getAddressSpace();
if (AddrSpace == ARM64AS::PTR64 || AddrSpace == ARM64AS::PTR32_SPTR ||
AddrSpace == ARM64AS::PTR32_UPTR) {
MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
if (PtrVT != LD->getBasePtr().getSimpleValueType()) {
SDValue Cast =
DAG.getAddrSpaceCast(DL, PtrVT, LD->getBasePtr(), AddrSpace, 0);
return DAG.getExtLoad(LD->getExtensionType(), DL, RegVT, LD->getChain(),
Cast, LD->getPointerInfo(), MemVT,
LD->getOriginalAlign(),
LD->getMemOperand()->getFlags());
}
}

if (LD->isVolatile() || !Subtarget->isLittleEndian())
return SDValue(N, 0);

Expand All @@ -23564,13 +23620,11 @@ static SDValue performLOADCombine(SDNode *N,
if (!LD->isNonTemporal())
return SDValue(N, 0);

EVT MemVT = LD->getMemoryVT();
if (MemVT.isScalableVector() || MemVT.getSizeInBits() <= 256 ||
MemVT.getSizeInBits() % 256 == 0 ||
256 % MemVT.getScalarSizeInBits() != 0)
return SDValue(N, 0);

SDLoc DL(LD);
SDValue Chain = LD->getChain();
SDValue BasePtr = LD->getBasePtr();
SDNodeFlags Flags = LD->getFlags();
Expand Down Expand Up @@ -23830,12 +23884,28 @@ static SDValue performSTORECombine(SDNode *N,
SDValue Value = ST->getValue();
SDValue Ptr = ST->getBasePtr();
EVT ValueVT = Value.getValueType();
EVT MemVT = ST->getMemoryVT();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc DL(ST);

auto hasValidElementTypeForFPTruncStore = [](EVT VT) {
EVT EltVT = VT.getVectorElementType();
return EltVT == MVT::f32 || EltVT == MVT::f64;
};

// Cast ptr32 and ptr64 pointers to the default address space before a store.
unsigned AddrSpace = ST->getAddressSpace();
if (AddrSpace == ARM64AS::PTR64 || AddrSpace == ARM64AS::PTR32_SPTR ||
AddrSpace == ARM64AS::PTR32_UPTR) {
MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
if (PtrVT != Ptr.getSimpleValueType()) {
SDValue Cast = DAG.getAddrSpaceCast(DL, PtrVT, Ptr, AddrSpace, 0);
return DAG.getStore(Chain, DL, Value, Cast, ST->getPointerInfo(),
ST->getOriginalAlign(),
ST->getMemOperand()->getFlags(), ST->getAAInfo());
}
}

if (SDValue Res = combineI8TruncStore(ST, DAG, Subtarget))
return Res;

Expand All @@ -23849,8 +23919,8 @@ static SDValue performSTORECombine(SDNode *N,
ValueVT.isFixedLengthVector() &&
ValueVT.getFixedSizeInBits() >= Subtarget->getMinSVEVectorSizeInBits() &&
hasValidElementTypeForFPTruncStore(Value.getOperand(0).getValueType()))
return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
ST->getMemoryVT(), ST->getMemOperand());
return DAG.getTruncStore(Chain, DL, Value.getOperand(0), Ptr, MemVT,
ST->getMemOperand());

if (SDValue Split = splitStores(N, DCI, DAG, Subtarget))
return Split;
Expand Down Expand Up @@ -27391,6 +27461,11 @@ void AArch64TargetLowering::ReplaceNodeResults(
ReplaceATOMIC_LOAD_128Results(N, Results, DAG, Subtarget);
return;
}
case ISD::ADDRSPACECAST: {
SDValue V = LowerADDRSPACECAST(SDValue(N, 0), DAG);
Results.push_back(V);
return;
}
case ISD::ATOMIC_LOAD:
case ISD::LOAD: {
MemSDNode *LoadNode = cast<MemSDNode>(N);
Expand Down
22 changes: 17 additions & 5 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,10 @@ const unsigned StackProbeMaxLoopUnroll = 4;

} // namespace AArch64

namespace ARM64AS {
enum : unsigned { PTR32_SPTR = 270, PTR32_UPTR = 271, PTR64 = 272 };
}

class AArch64Subtarget;

class AArch64TargetLowering : public TargetLowering {
Expand Down Expand Up @@ -594,11 +598,19 @@ class AArch64TargetLowering : public TargetLowering {
unsigned Depth) const override;

MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
// Returning i64 unconditionally here (i.e. even for ILP32) means that the
// *DAG* representation of pointers will always be 64-bits. They will be
// truncated and extended when transferred to memory, but the 64-bit DAG
// allows us to use AArch64's addressing modes much more easily.
return MVT::getIntegerVT(64);
if ((AS == ARM64AS::PTR32_SPTR) || (AS == ARM64AS::PTR32_UPTR)) {
// These are 32-bit pointers created using the `__ptr32` extension or
// similar. They are handled by marking them as being in a different
// address space, and will be extended to 64-bits when used as the target
// of a load or store operation, or cast to a 64-bit pointer type.
return MVT::i32;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This probably deserves a more complete comment... specifically noting that these pointers are 32-bit, but they'll be converted to 64-bit pointers before isel.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea

} else {
// Returning i64 unconditionally here (i.e. even for ILP32) means that the
// *DAG* representation of pointers will always be 64-bits. They will be
// truncated and extended when transferred to memory, but the 64-bit DAG
// allows us to use AArch64's addressing modes much more easily.
return MVT::i64;
}
}

bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Target/AArch64/AArch64TargetMachine.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,7 @@ class AArch64TargetMachine : public CodeGenTargetMachineImpl {

/// Returns true if a cast between SrcAS and DestAS is a noop.
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
// Addrspacecasts are always noops.
return true;
return getPointerSize(SrcAS) == getPointerSize(DestAS);
}

private:
Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2999,9 +2999,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
LLT PtrTy = MRI.getType(LdSt.getPointerReg());

// Can only handle AddressSpace 0, 64-bit pointers.
if (PtrTy != LLT::pointer(0, 64)) {
LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
<< ", expected: " << LLT::pointer(0, 64) << '\n');
return false;
}

Expand Down
182 changes: 182 additions & 0 deletions llvm/test/CodeGen/AArch64/aarch64-mixed-ptr-sizes.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s | FileCheck %s
; RUN: llc --fast-isel < %s | FileCheck %s
; RUN: llc --global-isel --global-isel-abort=2 < %s | FileCheck %s

; Source to regenerate:
; struct Foo {
; int * __ptr32 p32;
; int * __ptr64 p64;
; __attribute__((address_space(9))) int *p_other;
; };
; extern "C" void use_foo(Foo *f);
; extern "C" int use_int(int i);
; extern "C" void test_sign_ext(Foo *f, int * __ptr32 __sptr i) {
; f->p64 = i;
; use_foo(f);
; }
; extern "C" void test_sign_ext_store_load(int * __ptr32 __sptr i) {
; *i = use_int(*i);
; }
; extern "C" void test_zero_ext(Foo *f, int * __ptr32 __uptr i) {
; f->p64 = i;
; use_foo(f);
; }
; extern "C" void test_zero_ext_store_load(int * __ptr32 __uptr i) {
; *i = use_int(*i);
; }
; extern "C" void test_trunc(Foo *f, int * __ptr64 i) {
; f->p32 = i;
; use_foo(f);
; }
; extern "C" void test_noop1(Foo *f, int * __ptr32 i) {
; f->p32 = i;
; use_foo(f);
; }
; extern "C" void test_noop2(Foo *f, int * __ptr64 i) {
; f->p64 = i;
; use_foo(f);
; }
; extern "C" void test_null_arg(Foo *f, int * __ptr32 i) {
; test_noop1(f, 0);
; }
; extern "C" void test_unrecognized(Foo *f, __attribute__((address_space(14))) int *i) {
; f->p32 = (int * __ptr32)i;
; use_foo(f);
; }
;
; $ clang --target=aarch64-windows-msvc -fms-extensions -O2 -S -emit-llvm t.cpp

target datalayout = "e-m:w-p:64:64-i32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "aarch64-unknown-windows-msvc"

; Function Attrs: mustprogress uwtable
define dso_local void @test_sign_ext(ptr noundef %f, ptr addrspace(270) noundef %i) local_unnamed_addr #0 {
; CHECK-LABEL: test_sign_ext:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sxtw x8, w1
; CHECK-NEXT: str x8, [x0, #8]
; CHECK-NEXT: b use_foo
entry:
%0 = addrspacecast ptr addrspace(270) %i to ptr
%p64 = getelementptr inbounds nuw i8, ptr %f, i64 8
store ptr %0, ptr %p64, align 8
tail call void @use_foo(ptr noundef %f)
ret void
}

declare dso_local void @use_foo(ptr noundef) local_unnamed_addr #1

; Function Attrs: mustprogress uwtable
define dso_local void @test_sign_ext_store_load(ptr addrspace(270) nocapture noundef %i) local_unnamed_addr #0 {
; CHECK-LABEL: test_sign_ext_store_load:
; CHECK: // %bb.0: // %entry
; CHECK: sxtw x19, w0
; CHECK-NEXT: ldr w0, [x19]
; CHECK-NEXT: bl use_int
; CHECK-NEXT: str w0, [x19]
entry:
%0 = load i32, ptr addrspace(270) %i, align 4
%call = tail call i32 @use_int(i32 noundef %0)
store i32 %call, ptr addrspace(270) %i, align 4
ret void
}

declare dso_local i32 @use_int(i32 noundef) local_unnamed_addr #1

; Function Attrs: mustprogress uwtable
define dso_local void @test_zero_ext(ptr noundef %f, ptr addrspace(271) noundef %i) local_unnamed_addr #0 {
; CHECK-LABEL: test_zero_ext:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, w1
; CHECK-NEXT: str x8, [x0, #8]
; CHECK-NEXT: b use_foo
entry:
%0 = addrspacecast ptr addrspace(271) %i to ptr
%p64 = getelementptr inbounds nuw i8, ptr %f, i64 8
store ptr %0, ptr %p64, align 8
tail call void @use_foo(ptr noundef %f)
ret void
}

; Function Attrs: mustprogress uwtable
define dso_local void @test_zero_ext_store_load(ptr addrspace(271) nocapture noundef %i) local_unnamed_addr #0 {
; CHECK-LABEL: test_zero_ext_store_load:
; CHECK: // %bb.0: // %entry
; CHECK: mov w19, w0
; CHECK-NEXT: ldr w0, [x19]
; CHECK-NEXT: bl use_int
; CHECK-NEXT: str w0, [x19]
entry:
%0 = load i32, ptr addrspace(271) %i, align 4
%call = tail call i32 @use_int(i32 noundef %0)
store i32 %call, ptr addrspace(271) %i, align 4
ret void
}

; Function Attrs: mustprogress uwtable
define dso_local void @test_trunc(ptr noundef %f, ptr noundef %i) local_unnamed_addr #0 {
; CHECK-LABEL: test_trunc:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str w1, [x0]
; CHECK-NEXT: b use_foo
entry:
%0 = addrspacecast ptr %i to ptr addrspace(270)
store ptr addrspace(270) %0, ptr %f, align 8
tail call void @use_foo(ptr noundef nonnull %f)
ret void
}

; Function Attrs: mustprogress uwtable
define dso_local void @test_noop1(ptr noundef %f, ptr addrspace(270) noundef %i) local_unnamed_addr #0 {
; CHECK-LABEL: test_noop1:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str w1, [x0]
; CHECK-NEXT: b use_foo
entry:
store ptr addrspace(270) %i, ptr %f, align 8
tail call void @use_foo(ptr noundef nonnull %f)
ret void
}

; Function Attrs: mustprogress uwtable
define dso_local void @test_noop2(ptr noundef %f, ptr noundef %i) local_unnamed_addr #0 {
; CHECK-LABEL: test_noop2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x1, [x0, #8]
; CHECK-NEXT: b use_foo
entry:
%p64 = getelementptr inbounds nuw i8, ptr %f, i64 8
store ptr %i, ptr %p64, align 8
tail call void @use_foo(ptr noundef %f)
ret void
}

; Function Attrs: mustprogress uwtable
define dso_local void @test_null_arg(ptr noundef %f, ptr addrspace(270) nocapture noundef readnone %i) local_unnamed_addr #0 {
; CHECK-LABEL: test_null_arg:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str wzr, [x0]
; CHECK-NEXT: b use_foo
entry:
store ptr addrspace(270) null, ptr %f, align 8
tail call void @use_foo(ptr noundef nonnull %f)
ret void
}

; Function Attrs: mustprogress uwtable
define dso_local void @test_unrecognized(ptr noundef %f, ptr addrspace(14) noundef %i) local_unnamed_addr #0 {
; CHECK-LABEL: test_unrecognized:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str w1, [x0]
; CHECK-NEXT: b use_foo
entry:
%0 = addrspacecast ptr addrspace(14) %i to ptr addrspace(270)
store ptr addrspace(270) %0, ptr %f, align 8
tail call void @use_foo(ptr noundef nonnull %f)
ret void
}

attributes #0 = { mustprogress uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" }
attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" }
2 changes: 2 additions & 0 deletions llvm/test/CodeGen/X86/mixed-ptr-sizes.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s | FileCheck %s --check-prefixes=ALL,CHECK
; RUN: llc -O0 < %s | FileCheck %s --check-prefixes=ALL,CHECK-O0
; RUN: llc --fast-isel < %s | FileCheck %s --check-prefixes=ALL,CHECK
; RUN: llc --global-isel --global-isel-abort=2 < %s | FileCheck %s --check-prefixes=ALL,CHECK

; Source to regenerate:
; struct Foo {
Expand Down