Skip to content

Commit 4e01a07

Browse files
[AArch64] Fixes for Windows varargs handling. (#139972)
Omit spills when va_start is not present, like we do on other targets. In most situations this is just an optimization, but for thunks in arm64ec, we need this because it's illegal to reference varargs in a thunk. Fix the bug that prevented omitting vararg spills from working properly: we need to avoid interfering with musttail handling. (This is not a Windows-only issue, but it mostly affects Windows because musttail thunks are mostly used for the Microsoft C++ ABI.) On arm64ec, don't set x4 and x5 for musttail thunks; forward them from the caller. Fixes #139856 .
1 parent 976b006 commit 4e01a07

File tree

6 files changed

+110
-76
lines changed

6 files changed

+110
-76
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8231,26 +8231,26 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
82318231
}
82328232

82338233
// varargs
8234-
// Note that IsWin64 part is required to prevent odd miscompilations on arm64
8235-
// windows platforms. For more info refer to GH#126780 PR comments.
8236-
if (isVarArg &&
8237-
(DAG.getMachineFunction().getFrameInfo().hasVAStart() || IsWin64)) {
8238-
if (!Subtarget->isTargetDarwin() || IsWin64) {
8239-
// The AAPCS variadic function ABI is identical to the non-variadic
8240-
// one. As a result there may be more arguments in registers and we should
8241-
// save them for future reference.
8242-
// Win64 variadic functions also pass arguments in registers, but all float
8243-
// arguments are passed in integer registers.
8244-
saveVarArgRegisters(CCInfo, DAG, DL, Chain);
8245-
}
8246-
8247-
// This will point to the next argument passed via stack.
8248-
unsigned VarArgsOffset = CCInfo.getStackSize();
8249-
// We currently pass all varargs at 8-byte alignment, or 4 for ILP32
8250-
VarArgsOffset = alignTo(VarArgsOffset, Subtarget->isTargetILP32() ? 4 : 8);
8251-
FuncInfo->setVarArgsStackOffset(VarArgsOffset);
8252-
FuncInfo->setVarArgsStackIndex(
8253-
MFI.CreateFixedObject(4, VarArgsOffset, true));
8234+
if (isVarArg) {
8235+
if (DAG.getMachineFunction().getFrameInfo().hasVAStart()) {
8236+
if (!Subtarget->isTargetDarwin() || IsWin64) {
8237+
// The AAPCS variadic function ABI is identical to the non-variadic
8238+
// one. As a result there may be more arguments in registers and we
8239+
// should save them for future reference.
8240+
// Win64 variadic functions also pass arguments in registers, but all
8241+
// float arguments are passed in integer registers.
8242+
saveVarArgRegisters(CCInfo, DAG, DL, Chain);
8243+
}
8244+
8245+
// This will point to the next argument passed via stack.
8246+
unsigned VarArgsOffset = CCInfo.getStackSize();
8247+
// We currently pass all varargs at 8-byte alignment, or 4 for ILP32
8248+
VarArgsOffset =
8249+
alignTo(VarArgsOffset, Subtarget->isTargetILP32() ? 4 : 8);
8250+
FuncInfo->setVarArgsStackOffset(VarArgsOffset);
8251+
FuncInfo->setVarArgsStackIndex(
8252+
MFI.CreateFixedObject(4, VarArgsOffset, true));
8253+
}
82548254

82558255
if (MFI.hasMustTailInVarArgFunc()) {
82568256
SmallVector<MVT, 2> RegParmTypes;
@@ -9436,7 +9436,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
94369436
}
94379437
}
94389438

9439-
if (IsVarArg && Subtarget->isWindowsArm64EC()) {
9439+
if (IsVarArg && Subtarget->isWindowsArm64EC() &&
9440+
!(CLI.CB && CLI.CB->isMustTailCall())) {
94409441
SDValue ParamPtr = StackPtr;
94419442
if (IsTailCall) {
94429443
// Create a dummy object at the top of the stack that can be used to get

llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,15 @@ define void @has_varargs(...) hybrid_patchable nounwind {
2424
; CHECK-NEXT: .p2align 2
2525
; CHECK-NEXT: "#has_varargs$hp_target": // @"#has_varargs$hp_target"
2626
; CHECK-NEXT: // %bb.0:
27-
; CHECK-NEXT: sub sp, sp, #32
28-
; CHECK-NEXT: stp x0, x1, [x4, #-32]
29-
; CHECK-NEXT: stp x2, x3, [x4, #-16]
30-
; CHECK-NEXT: add sp, sp, #32
27+
; CHECK-NEXT: sub sp, sp, #48
28+
; CHECK-NEXT: stp x0, x1, [x4, #-32]!
29+
; CHECK-NEXT: stp x2, x3, [x4, #16]
30+
; CHECK-NEXT: str x4, [sp, #8]
31+
; CHECK-NEXT: add sp, sp, #48
3132
; CHECK-NEXT: ret
33+
%valist = alloca ptr
34+
call void @llvm.va_start(ptr %valist)
35+
call void @llvm.va_end(ptr %valist)
3236
ret void
3337
}
3438

llvm/test/CodeGen/AArch64/arm64ec-varargs.ll

Lines changed: 50 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -102,40 +102,58 @@ define void @varargs_many_argscalleer() nounwind {
102102

103103
define void @varargs_caller_tail() nounwind {
104104
; CHECK-LABEL: varargs_caller_tail:
105-
; CHECK: // %bb.0:
106-
; CHECK-NEXT: sub sp, sp, #48
107-
; CHECK-NEXT: mov x4, sp
108-
; CHECK-NEXT: add x8, sp, #16
109-
; CHECK-NEXT: mov x9, #4617315517961601024 // =0x4014000000000000
110-
; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000
111-
; CHECK-NEXT: mov w1, #2 // =0x2
112-
; CHECK-NEXT: mov x2, #4613937818241073152 // =0x4008000000000000
113-
; CHECK-NEXT: mov w3, #4 // =0x4
114-
; CHECK-NEXT: mov w5, #16 // =0x10
115-
; CHECK-NEXT: stp xzr, x30, [sp, #24] // 8-byte Folded Spill
116-
; CHECK-NEXT: stp x9, x8, [sp]
117-
; CHECK-NEXT: str xzr, [sp, #16]
118-
; CHECK-NEXT: .weak_anti_dep varargs_callee
119-
; CHECK-NEXT:.set varargs_callee, "#varargs_callee"@WEAKREF
120-
; CHECK-NEXT: .weak_anti_dep "#varargs_callee"
121-
; CHECK-NEXT:.set "#varargs_callee", varargs_callee@WEAKREF
122-
; CHECK-NEXT: bl "#varargs_callee"
123-
; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
124-
; CHECK-NEXT: add x4, sp, #48
125-
; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000
126-
; CHECK-NEXT: mov w1, #4 // =0x4
127-
; CHECK-NEXT: mov w2, #3 // =0x3
128-
; CHECK-NEXT: mov w3, #2 // =0x2
129-
; CHECK-NEXT: mov x5, xzr
130-
; CHECK-NEXT: add sp, sp, #48
131-
; CHECK-NEXT: .weak_anti_dep varargs_callee
132-
; CHECK-NEXT:.set varargs_callee, "#varargs_callee"@WEAKREF
133-
; CHECK-NEXT: .weak_anti_dep "#varargs_callee"
134-
; CHECK-NEXT:.set "#varargs_callee", varargs_callee@WEAKREF
135-
; CHECK-NEXT: b "#varargs_callee"
105+
; CHECK: // %bb.0:
106+
; CHECK-NEXT: sub sp, sp, #48
107+
; CHECK-NEXT: mov x4, sp
108+
; CHECK-NEXT: add x8, sp, #16
109+
; CHECK-NEXT: mov x9, #4617315517961601024 // =0x4014000000000000
110+
; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000
111+
; CHECK-NEXT: mov w1, #2 // =0x2
112+
; CHECK-NEXT: mov x2, #4613937818241073152 // =0x4008000000000000
113+
; CHECK-NEXT: mov w3, #4 // =0x4
114+
; CHECK-NEXT: mov w5, #16 // =0x10
115+
; CHECK-NEXT: stp xzr, x30, [sp, #24] // 8-byte Folded Spill
116+
; CHECK-NEXT: stp x9, x8, [sp]
117+
; CHECK-NEXT: str xzr, [sp, #16]
118+
; CHECK-NEXT: .weak_anti_dep varargs_callee
119+
; CHECK-NEXT: .set varargs_callee, "#varargs_callee"@WEAKREF
120+
; CHECK-NEXT: .weak_anti_dep "#varargs_callee"
121+
; CHECK-NEXT: .set "#varargs_callee", varargs_callee@WEAKREF
122+
; CHECK-NEXT: bl "#varargs_callee"
123+
; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
124+
; CHECK-NEXT: add x4, sp, #48
125+
; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000
126+
; CHECK-NEXT: mov w1, #4 // =0x4
127+
; CHECK-NEXT: mov w2, #3 // =0x3
128+
; CHECK-NEXT: mov w3, #2 // =0x2
129+
; CHECK-NEXT: mov x5, xzr
130+
; CHECK-NEXT: add sp, sp, #48
131+
; CHECK-NEXT: .weak_anti_dep varargs_callee
132+
; CHECK-NEXT: .set varargs_callee, "#varargs_callee"@WEAKREF
133+
; CHECK-NEXT: .weak_anti_dep "#varargs_callee"
134+
; CHECK-NEXT: .set "#varargs_callee", varargs_callee@WEAKREF
135+
; CHECK-NEXT: b "#varargs_callee"
136136
call void (double, ...) @varargs_callee(double 1.0, i32 2, double 3.0, i32 4, double 5.0, <2 x double> <double 0.0, double 0.0>)
137137
tail call void (double, ...) @varargs_callee(double 1.0, i32 4, i32 3, i32 2)
138138
ret void
139139
}
140140

141-
declare void @llvm.va_start(ptr)
141+
; Check we spill/restore x4 and x5, and don't dereference x4.
142+
define void @varargs_thunk(ptr noundef %0, ...) "thunk" {
143+
; CHECK-LABEL: varargs_thunk:
144+
; CHECK: // %bb.0:
145+
; CHECK-NEXT: ldr x11, [x0]
146+
; CHECK-NEXT: mov x9, x5
147+
; CHECK-NEXT: mov x10, x4
148+
; CHECK-NEXT: //APP
149+
; CHECK-NEXT: //NO_APP
150+
; CHECK-NEXT: ldr x11, [x11]
151+
; CHECK-NEXT: mov x4, x10
152+
; CHECK-NEXT: mov x5, x9
153+
; CHECK-NEXT: br x11
154+
call void asm "","~{x4},~{x5}"()
155+
%vtable = load ptr, ptr %0, align 8
156+
%vtablefn = load ptr, ptr %vtable, align 8
157+
musttail call void (ptr, ...) %vtablefn(ptr noundef %0, ...)
158+
ret void
159+
}

llvm/test/CodeGen/AArch64/darwinpcs-tail.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
; CHECK-LABEL: _tailTest:
99
; CHECK: b __ZN1C3addEPKcz
1010
; CHECK-LABEL: __ZThn8_N1C1fEiiiiiiiiiz:
11-
; CHECK: ldr w8, [sp, #4]
12-
; CHECK: str w8, [sp, #4]
11+
; CHECK: ldr w9, [sp, #4]
12+
; CHECK: str w9, [sp, #4]
1313
; CHECK: b __ZN1C1fEiiiiiiiiiz
1414

1515
%class.C = type { %class.A.base, [4 x i8], %class.B.base, [4 x i8] }

llvm/test/CodeGen/AArch64/vararg-tallcall.ll

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,16 @@ attributes #1 = { noinline optnone "thunk" }
3737
; CHECK: ldr x9, [x9]
3838
; CHECK: mov v0.16b, v16.16b
3939
; CHECK: br x9
40-
; CHECK-EC: mov v7.16b, v0.16b
41-
; CHECK-EC: ldr x9, [x0]
42-
; CHECK-EC: ldr x11, [x9]
43-
; CHECH-EC: add x4, sp, #96
44-
; CHECK-EC: mov v0.16b, v7.16b
45-
; CHECK-EC: add x4, sp, #96
46-
; CHECK-EC: ldr x30, [sp, #48]
47-
; CHECK-EC: add sp, sp, #96
48-
; CHECK-EC: br x11
40+
; CHECK-EC: mov v7.16b, v0.16b
41+
; CHECK-EC: ldr x9, [x0]
42+
; CHECK-EC: ldr x11, [x9]
43+
; CHECK-EC: blr x9
44+
; CHECK-EC-NEXT: mov v0.16b, v7.16b
45+
; CHECK-EC-NEXT: ldr q7, [sp]
46+
; CHECK-EC-NEXT: .seh_startepilogue
47+
; CHECK-EC-NEXT: ldr x30, [sp, #48]
48+
; CHECK-EC-NEXT: .seh_save_reg x30, 48
49+
; CHECK-EC-NEXT: add sp, sp, #96
50+
; CHECK-EC-NEXT: .seh_stackalloc 96
51+
; CHECK-EC-NEXT: .seh_endepilogue
52+
; CHECK-EC-NEXT: br x11

llvm/test/CodeGen/AArch64/win64_vararg2.ll

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -mtriple=aarch64-pc-win32 | FileCheck %s
3-
; RUN: llc < %s -global-isel -mtriple=aarch64-pc-win32 | FileCheck %s --check-prefix=GISEL
3+
; RUN: llc < %s -global-isel -mtriple=aarch64-pc-win32 -global-isel-abort=0 | FileCheck %s --check-prefix=GISEL
44

55
; Function Attrs: mustprogress noinline nounwind optnone uwtable
66
define i1 @va_func(i32 %a, i8 %b, i8 %c, ...) {
@@ -14,13 +14,15 @@ define i1 @va_func(i32 %a, i8 %b, i8 %c, ...) {
1414
; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill
1515
; CHECK-NEXT: .seh_save_reg x30, 24
1616
; CHECK-NEXT: .seh_endprologue
17+
; CHECK-NEXT: add x8, sp, #40
1718
; CHECK-NEXT: mov w19, w0
1819
; CHECK-NEXT: stp x3, x4, [sp, #40]
1920
; CHECK-NEXT: stp x5, x6, [sp, #56]
2021
; CHECK-NEXT: str x7, [sp, #72]
21-
; CHECK-NEXT: str w0, [sp, #12]
22-
; CHECK-NEXT: strb w1, [sp, #11]
23-
; CHECK-NEXT: strb w2, [sp, #10]
22+
; CHECK-NEXT: str x8, [sp, #8]
23+
; CHECK-NEXT: str w0, [sp, #4]
24+
; CHECK-NEXT: strb w1, [sp, #3]
25+
; CHECK-NEXT: strb w2, [sp, #2]
2426
; CHECK-NEXT: bl other
2527
; CHECK-NEXT: cmp w19, w0
2628
; CHECK-NEXT: cset w0, ls
@@ -46,13 +48,15 @@ define i1 @va_func(i32 %a, i8 %b, i8 %c, ...) {
4648
; GISEL-NEXT: str x30, [sp, #24] // 8-byte Folded Spill
4749
; GISEL-NEXT: .seh_save_reg x30, 24
4850
; GISEL-NEXT: .seh_endprologue
49-
; GISEL-NEXT: stp x3, x4, [sp, #40]
51+
; GISEL-NEXT: add x8, sp, #40
5052
; GISEL-NEXT: mov w19, w0
53+
; GISEL-NEXT: stp x3, x4, [sp, #40]
5154
; GISEL-NEXT: stp x5, x6, [sp, #56]
5255
; GISEL-NEXT: str x7, [sp, #72]
53-
; GISEL-NEXT: str w0, [sp, #12]
54-
; GISEL-NEXT: strb w1, [sp, #11]
55-
; GISEL-NEXT: strb w2, [sp, #10]
56+
; GISEL-NEXT: str x8, [sp, #8]
57+
; GISEL-NEXT: str w0, [sp, #4]
58+
; GISEL-NEXT: strb w1, [sp, #3]
59+
; GISEL-NEXT: strb w2, [sp, #2]
5660
; GISEL-NEXT: bl other
5761
; GISEL-NEXT: cmp w19, w0
5862
; GISEL-NEXT: cset w0, ls
@@ -67,6 +71,8 @@ define i1 @va_func(i32 %a, i8 %b, i8 %c, ...) {
6771
; GISEL-NEXT: ret
6872
; GISEL-NEXT: .seh_endfunclet
6973
; GISEL-NEXT: .seh_endproc
74+
%valist = alloca ptr
75+
call void @llvm.va_start(ptr %valist)
7076
%a_alloc = alloca i32, align 4
7177
%b_alloc = alloca i8, align 1
7278
%c_alloc = alloca i8, align 1
@@ -76,6 +82,7 @@ define i1 @va_func(i32 %a, i8 %b, i8 %c, ...) {
7682
%a_load = load i32, ptr %a_alloc, align 4
7783
%ret = call noundef i32 @other()
7884
%cmp = icmp ule i32 %a_load, %ret
85+
call void @llvm.va_end(ptr %valist)
7986
ret i1 %cmp
8087
}
8188

0 commit comments

Comments
 (0)