Skip to content

Commit 65e44b4

Browse files
committed
[LV] Add tests with deref assumptions and non-constant sizes.
1 parent 34387fc commit 65e44b4

File tree

1 file changed

+339
-0
lines changed

1 file changed

+339
-0
lines changed
Lines changed: 339 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,339 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
2+
; RUN: opt -p loop-vectorize -force-vector-width=2 -S %s | FileCheck %s
3+
4+
declare void @llvm.assume(i1)
5+
6+
; %a is known dereferenceable via assume for the whole loop.
7+
define void @deref_assumption_in_preheader_non_constant_trip_count_access_i8(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync {
8+
; CHECK-LABEL: define void @deref_assumption_in_preheader_non_constant_trip_count_access_i8(
9+
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
10+
; CHECK-NEXT: [[ENTRY:.*]]:
11+
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 [[N]]) ]
12+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
13+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
14+
; CHECK: [[VECTOR_PH]]:
15+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
16+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
17+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
18+
; CHECK: [[VECTOR_BODY]]:
19+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
20+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
21+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
22+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]]
23+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
24+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP3]], align 1
25+
; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i8> [[WIDE_LOAD]], zeroinitializer
26+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
27+
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i8>, ptr [[TMP5]], align 1
28+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i8> [[WIDE_LOAD]], <2 x i8> [[WIDE_LOAD1]]
29+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[TMP0]]
30+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0
31+
; CHECK-NEXT: store <2 x i8> [[PREDPHI]], ptr [[TMP7]], align 1
32+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
33+
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
34+
; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
35+
; CHECK: [[MIDDLE_BLOCK]]:
36+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
37+
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
38+
; CHECK: [[SCALAR_PH]]:
39+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
40+
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
41+
; CHECK: [[LOOP_HEADER]]:
42+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
43+
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
44+
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV]]
45+
; CHECK-NEXT: [[L_B:%.*]] = load i8, ptr [[GEP_B]], align 1
46+
; CHECK-NEXT: [[C_1:%.*]] = icmp sge i8 [[L_B]], 0
47+
; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
48+
; CHECK: [[LOOP_THEN]]:
49+
; CHECK-NEXT: [[L_A:%.*]] = load i8, ptr [[GEP_A]], align 1
50+
; CHECK-NEXT: br label %[[LOOP_LATCH]]
51+
; CHECK: [[LOOP_LATCH]]:
52+
; CHECK-NEXT: [[MERGE:%.*]] = phi i8 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
53+
; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[IV]]
54+
; CHECK-NEXT: store i8 [[MERGE]], ptr [[GEP_C]], align 1
55+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
56+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
57+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
58+
; CHECK: [[EXIT]]:
59+
; CHECK-NEXT: ret void
60+
;
61+
entry:
62+
call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 %n) ]
63+
br label %loop.header
64+
65+
loop.header:
66+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
67+
%gep.a = getelementptr inbounds i8, ptr %a, i64 %iv
68+
%gep.b = getelementptr inbounds i8, ptr %b, i64 %iv
69+
%l.b = load i8, ptr %gep.b, align 1
70+
%c.1 = icmp sge i8 %l.b, 0
71+
br i1 %c.1, label %loop.latch, label %loop.then
72+
73+
loop.then:
74+
%l.a = load i8, ptr %gep.a, align 1
75+
br label %loop.latch
76+
77+
loop.latch:
78+
%merge = phi i8 [ %l.a, %loop.then ], [ %l.b, %loop.header ]
79+
%gep.c = getelementptr inbounds i8, ptr %c, i64 %iv
80+
store i8 %merge, ptr %gep.c, align 1
81+
%iv.next = add nuw nsw i64 %iv, 1
82+
%ec = icmp eq i64 %iv.next, %n
83+
br i1 %ec, label %exit, label %loop.header
84+
85+
exit:
86+
ret void
87+
}
88+
89+
; %a is known dereferenceable via assume for the whole loop.
90+
define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync {
91+
; CHECK-LABEL: define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32(
92+
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1]] {
93+
; CHECK-NEXT: [[ENTRY:.*]]:
94+
; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[N]], 4
95+
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 [[MUL]]) ]
96+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
97+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
98+
; CHECK: [[VECTOR_PH]]:
99+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
100+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
101+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
102+
; CHECK: [[VECTOR_BODY]]:
103+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
104+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
105+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
106+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
107+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
108+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1
109+
; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
110+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0
111+
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP5]], align 1
112+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]]
113+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
114+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
115+
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP7]], align 1
116+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
117+
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
118+
; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
119+
; CHECK: [[MIDDLE_BLOCK]]:
120+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
121+
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
122+
; CHECK: [[SCALAR_PH]]:
123+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
124+
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
125+
; CHECK: [[LOOP_HEADER]]:
126+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
127+
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
128+
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
129+
; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 1
130+
; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
131+
; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
132+
; CHECK: [[LOOP_THEN]]:
133+
; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 1
134+
; CHECK-NEXT: br label %[[LOOP_LATCH]]
135+
; CHECK: [[LOOP_LATCH]]:
136+
; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
137+
; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
138+
; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 1
139+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
140+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
141+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
142+
; CHECK: [[EXIT]]:
143+
; CHECK-NEXT: ret void
144+
;
145+
entry:
146+
%mul = mul nsw nuw i64 %n, 4
147+
call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 %mul) ]
148+
br label %loop.header
149+
150+
loop.header:
151+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
152+
%gep.a = getelementptr inbounds i32, ptr %a, i64 %iv
153+
%gep.b = getelementptr inbounds i32, ptr %b, i64 %iv
154+
%l.b = load i32, ptr %gep.b, align 1
155+
%c.1 = icmp sge i32 %l.b, 0
156+
br i1 %c.1, label %loop.latch, label %loop.then
157+
158+
loop.then:
159+
%l.a = load i32, ptr %gep.a, align 1
160+
br label %loop.latch
161+
162+
loop.latch:
163+
%merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ]
164+
%gep.c = getelementptr inbounds i32, ptr %c, i64 %iv
165+
store i32 %merge, ptr %gep.c, align 1
166+
%iv.next = add nuw nsw i64 %iv, 1
167+
%ec = icmp eq i64 %iv.next, %n
168+
br i1 %ec, label %exit, label %loop.header
169+
170+
exit:
171+
ret void
172+
}
173+
174+
175+
; %a is NOT known dereferenceable via assume for the whole loop.
176+
define void @deref_assumption_in_preheader_too_small_non_constant_trip_count_access_i32(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync {
177+
; CHECK-LABEL: define void @deref_assumption_in_preheader_too_small_non_constant_trip_count_access_i32(
178+
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1]] {
179+
; CHECK-NEXT: [[ENTRY:.*]]:
180+
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 [[N]]) ]
181+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
182+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
183+
; CHECK: [[VECTOR_PH]]:
184+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
185+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
186+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
187+
; CHECK: [[VECTOR_BODY]]:
188+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
189+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
190+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
191+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
192+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
193+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1
194+
; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
195+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0
196+
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP5]], align 1
197+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]]
198+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
199+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
200+
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP7]], align 1
201+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
202+
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
203+
; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
204+
; CHECK: [[MIDDLE_BLOCK]]:
205+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
206+
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
207+
; CHECK: [[SCALAR_PH]]:
208+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
209+
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
210+
; CHECK: [[LOOP_HEADER]]:
211+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
212+
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
213+
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
214+
; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 1
215+
; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
216+
; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
217+
; CHECK: [[LOOP_THEN]]:
218+
; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 1
219+
; CHECK-NEXT: br label %[[LOOP_LATCH]]
220+
; CHECK: [[LOOP_LATCH]]:
221+
; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
222+
; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
223+
; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 1
224+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
225+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
226+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]]
227+
; CHECK: [[EXIT]]:
228+
; CHECK-NEXT: ret void
229+
;
230+
entry:
231+
call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 %n) ]
232+
br label %loop.header
233+
234+
loop.header:
235+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
236+
%gep.a = getelementptr inbounds i32, ptr %a, i64 %iv
237+
%gep.b = getelementptr inbounds i32, ptr %b, i64 %iv
238+
%l.b = load i32, ptr %gep.b, align 1
239+
%c.1 = icmp sge i32 %l.b, 0
240+
br i1 %c.1, label %loop.latch, label %loop.then
241+
242+
loop.then:
243+
%l.a = load i32, ptr %gep.a, align 1
244+
br label %loop.latch
245+
246+
loop.latch:
247+
%merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ]
248+
%gep.c = getelementptr inbounds i32, ptr %c, i64 %iv
249+
store i32 %merge, ptr %gep.c, align 1
250+
%iv.next = add nuw nsw i64 %iv, 1
251+
%ec = icmp eq i64 %iv.next, %n
252+
br i1 %ec, label %exit, label %loop.header
253+
254+
exit:
255+
ret void
256+
}
257+
258+
; %a is NOT known dereferenceable via assume for the whole loop.
259+
define void @deref_assumption_in_preheader_too_small2_non_constant_trip_count_access_i32(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync {
260+
; CHECK-LABEL: define void @deref_assumption_in_preheader_too_small2_non_constant_trip_count_access_i32(
261+
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1]] {
262+
; CHECK-NEXT: [[ENTRY:.*]]:
263+
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 100) ]
264+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
265+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
266+
; CHECK: [[VECTOR_PH]]:
267+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
268+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
269+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
270+
; CHECK: [[VECTOR_BODY]]:
271+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
272+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
273+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
274+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
275+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
276+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1
277+
; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
278+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0
279+
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP5]], align 1
280+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]]
281+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
282+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
283+
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP7]], align 1
284+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
285+
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
286+
; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
287+
; CHECK: [[MIDDLE_BLOCK]]:
288+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
289+
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
290+
; CHECK: [[SCALAR_PH]]:
291+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
292+
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
293+
; CHECK: [[LOOP_HEADER]]:
294+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
295+
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
296+
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
297+
; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 1
298+
; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
299+
; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
300+
; CHECK: [[LOOP_THEN]]:
301+
; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 1
302+
; CHECK-NEXT: br label %[[LOOP_LATCH]]
303+
; CHECK: [[LOOP_LATCH]]:
304+
; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
305+
; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
306+
; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 1
307+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
308+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
309+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]]
310+
; CHECK: [[EXIT]]:
311+
; CHECK-NEXT: ret void
312+
;
313+
entry:
314+
call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 100) ]
315+
br label %loop.header
316+
317+
loop.header:
318+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
319+
%gep.a = getelementptr inbounds i32, ptr %a, i64 %iv
320+
%gep.b = getelementptr inbounds i32, ptr %b, i64 %iv
321+
%l.b = load i32, ptr %gep.b, align 1
322+
%c.1 = icmp sge i32 %l.b, 0
323+
br i1 %c.1, label %loop.latch, label %loop.then
324+
325+
loop.then:
326+
%l.a = load i32, ptr %gep.a, align 1
327+
br label %loop.latch
328+
329+
loop.latch:
330+
%merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ]
331+
%gep.c = getelementptr inbounds i32, ptr %c, i64 %iv
332+
store i32 %merge, ptr %gep.c, align 1
333+
%iv.next = add nuw nsw i64 %iv, 1
334+
%ec = icmp eq i64 %iv.next, %n
335+
br i1 %ec, label %exit, label %loop.header
336+
337+
exit:
338+
ret void
339+
}

0 commit comments

Comments
 (0)