@@ -189,7 +189,60 @@ define void @foo18(float ** noalias readonly %from, float ** %to) {
189
189
ret void
190
190
}
191
191
192
- !nvvm.annotations = !{!1 ,!2 ,!3 ,!4 ,!5 ,!6 , !7 ,!8 ,!9 ,!10 ,!11 ,!12 , !13 , !14 , !15 , !16 , !17 , !18 }
192
+ ; Test that we can infer a cached load for a pointer induction variable.
193
+ ; SM20-LABEL: .visible .entry foo19(
194
+ ; SM20: ld.global.f32
195
+ ; SM35-LABEL: .visible .entry foo19(
196
+ ; SM35: ld.global.nc.f32
197
+ define void @foo19 (float * noalias readonly %from , float * %to , i32 %n ) {
198
+ entry:
199
+ br label %loop
200
+
201
+ loop:
202
+ %i = phi i32 [ 0 , %entry ], [ %nexti , %loop ]
203
+ %sum = phi float [ 0 .0 , %entry ], [ %nextsum , %loop ]
204
+ %ptr = getelementptr inbounds float , float * %from , i32 %i
205
+ %value = load float , float * %ptr , align 4
206
+ %nextsum = fadd float %value , %sum
207
+ %nexti = add nsw i32 %i , 1
208
+ %exitcond = icmp eq i32 %nexti , %n
209
+ br i1 %exitcond , label %exit , label %loop
210
+
211
+ exit:
212
+ store float %nextsum , float * %to
213
+ ret void
214
+ }
215
+
216
+ ; This test captures the case of a non-kernel function. In a
217
+ ; non-kernel function, without interprocedural analysis, we do not
218
+ ; know that the parameter is global. We also do not know that the
219
+ ; pointed-to memory is never written to (for the duration of the
220
+ ; kernel). For both reasons, we cannot use a cached load here.
221
+ ; SM20-LABEL: notkernel(
222
+ ; SM20: ld.f32
223
+ ; SM35-LABEL: notkernel(
224
+ ; SM35: ld.f32
225
+ define void @notkernel (float * noalias readonly %from , float * %to ) {
226
+ %1 = load float , float * %from
227
+ store float %1 , float * %to
228
+ ret void
229
+ }
230
+
231
+ ; As @notkernel, but with the parameter explicitly marked as global. We still
232
+ ; do not know that the parameter is never written to (for the duration of the
233
+ ; kernel). This case does not currently come up normally since we do not infer
234
+ ; that pointers are global interprocedurally as of 2015-08-05.
235
+ ; SM20-LABEL: notkernel2(
236
+ ; SM20: ld.global.f32
237
+ ; SM35-LABEL: notkernel2(
238
+ ; SM35: ld.global.f32
239
+ define void @notkernel2 (float addrspace (1 ) * noalias readonly %from , float * %to ) {
240
+ %1 = load float , float addrspace (1 ) * %from
241
+ store float %1 , float * %to
242
+ ret void
243
+ }
244
+
245
+ !nvvm.annotations = !{!1 ,!2 ,!3 ,!4 ,!5 ,!6 , !7 ,!8 ,!9 ,!10 ,!11 ,!12 , !13 , !14 , !15 , !16 , !17 , !18 , !19 }
193
246
!1 = !{void (float *, float *)* @foo1 , !"kernel" , i32 1 }
194
247
!2 = !{void (double *, double *)* @foo2 , !"kernel" , i32 1 }
195
248
!3 = !{void (i16 *, i16 *)* @foo3 , !"kernel" , i32 1 }
@@ -208,3 +261,4 @@ define void @foo18(float ** noalias readonly %from, float ** %to) {
208
261
!16 = !{void (<4 x float > *, <4 x float > *)* @foo16 , !"kernel" , i32 1 }
209
262
!17 = !{void (<4 x double > *, <4 x double > *)* @foo17 , !"kernel" , i32 1 }
210
263
!18 = !{void (float **, float **)* @foo18 , !"kernel" , i32 1 }
264
+ !19 = !{void (float *, float *, i32 )* @foo19 , !"kernel" , i32 1 }
0 commit comments