@@ -85,10 +85,8 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
85
85
; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
86
86
; CHECK: region.guarded:
87
87
; CHECK-NEXT: store i32 0, ptr [[X]], align 4, !noalias [[META7:![0-9]+]]
88
- ; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ARRAYIDX1_I]] to ptr addrspace(1)
89
- ; CHECK-NEXT: store i32 1, ptr addrspace(1) [[TMP4]], align 4, !noalias [[META7]]
90
- ; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[ARRAYIDX2_I]] to ptr addrspace(1)
91
- ; CHECK-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(1) [[TMP5]], align 4, !noalias [[META7]]
88
+ ; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias [[META7]]
89
+ ; CHECK-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias [[META7]]
92
90
; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]]
93
91
; CHECK: region.guarded.end:
94
92
; CHECK-NEXT: br label [[REGION_BARRIER]]
@@ -109,17 +107,16 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
109
107
; CHECK-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM4_I]]
110
108
; CHECK-NEXT: br label [[REGION_CHECK_TID5:%.*]]
111
109
; CHECK: region.check.tid5:
112
- ; CHECK-NEXT: [[TMP6 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
113
- ; CHECK-NEXT: [[TMP7 :%.*]] = icmp eq i32 [[TMP6 ]], 0
114
- ; CHECK-NEXT: br i1 [[TMP7 ]], label [[REGION_GUARDED4:%.*]], label [[REGION_BARRIER2:%.*]]
110
+ ; CHECK-NEXT: [[TMP4 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
111
+ ; CHECK-NEXT: [[TMP5 :%.*]] = icmp eq i32 [[TMP4 ]], 0
112
+ ; CHECK-NEXT: br i1 [[TMP5 ]], label [[REGION_GUARDED4:%.*]], label [[REGION_BARRIER2:%.*]]
115
113
; CHECK: region.guarded4:
116
- ; CHECK-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[ARRAYIDX5_I]] to ptr addrspace(1)
117
- ; CHECK-NEXT: store i32 [[SUB3_I]], ptr addrspace(1) [[TMP8]], align 4, !noalias [[META7]]
114
+ ; CHECK-NEXT: store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias [[META7]]
118
115
; CHECK-NEXT: br label [[REGION_GUARDED_END1:%.*]]
119
116
; CHECK: region.guarded.end1:
120
117
; CHECK-NEXT: br label [[REGION_BARRIER2]]
121
118
; CHECK: region.barrier2:
122
- ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP6 ]])
119
+ ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP4 ]])
123
120
; CHECK-NEXT: br label [[REGION_EXIT3]]
124
121
; CHECK: region.exit3:
125
122
; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1
@@ -131,53 +128,50 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
131
128
; CHECK-NEXT: [[ARRAYIDX7_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM6_I]]
132
129
; CHECK-NEXT: br label [[REGION_CHECK_TID10:%.*]]
133
130
; CHECK: region.check.tid10:
134
- ; CHECK-NEXT: [[TMP9 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
135
- ; CHECK-NEXT: [[TMP10 :%.*]] = icmp eq i32 [[TMP9 ]], 0
136
- ; CHECK-NEXT: br i1 [[TMP10 ]], label [[REGION_GUARDED9:%.*]], label [[REGION_BARRIER7:%.*]]
131
+ ; CHECK-NEXT: [[TMP6 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
132
+ ; CHECK-NEXT: [[TMP7 :%.*]] = icmp eq i32 [[TMP6 ]], 0
133
+ ; CHECK-NEXT: br i1 [[TMP7 ]], label [[REGION_GUARDED9:%.*]], label [[REGION_BARRIER7:%.*]]
137
134
; CHECK: region.guarded9:
138
- ; CHECK-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[ARRAYIDX7_I]] to ptr addrspace(1)
139
- ; CHECK-NEXT: store i32 [[CALL_I]], ptr addrspace(1) [[TMP11]], align 4, !noalias [[META7]]
135
+ ; CHECK-NEXT: store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias [[META7]]
140
136
; CHECK-NEXT: br label [[REGION_GUARDED_END6:%.*]]
141
137
; CHECK: region.guarded.end6:
142
138
; CHECK-NEXT: br label [[REGION_BARRIER7]]
143
139
; CHECK: region.barrier7:
144
- ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP9 ]])
140
+ ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP6 ]])
145
141
; CHECK-NEXT: br label [[REGION_EXIT8:%.*]]
146
142
; CHECK: region.exit8:
147
143
; CHECK-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
148
144
; CHECK-NEXT: [[IDXPROM9_I:%.*]] = sext i32 [[CALL8_I]] to i64
149
145
; CHECK-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM9_I]]
150
146
; CHECK-NEXT: br label [[REGION_CHECK_TID15:%.*]]
151
147
; CHECK: region.check.tid15:
152
- ; CHECK-NEXT: [[TMP12 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
153
- ; CHECK-NEXT: [[TMP13 :%.*]] = icmp eq i32 [[TMP12 ]], 0
154
- ; CHECK-NEXT: br i1 [[TMP13 ]], label [[REGION_GUARDED14:%.*]], label [[REGION_BARRIER12:%.*]]
148
+ ; CHECK-NEXT: [[TMP8 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
149
+ ; CHECK-NEXT: [[TMP9 :%.*]] = icmp eq i32 [[TMP8 ]], 0
150
+ ; CHECK-NEXT: br i1 [[TMP9 ]], label [[REGION_GUARDED14:%.*]], label [[REGION_BARRIER12:%.*]]
155
151
; CHECK: region.guarded14:
156
- ; CHECK-NEXT: [[TMP14:%.*]] = addrspacecast ptr [[ARRAYIDX10_I]] to ptr addrspace(1)
157
- ; CHECK-NEXT: store i32 [[CALL8_I]], ptr addrspace(1) [[TMP14]], align 4, !noalias [[META7]]
152
+ ; CHECK-NEXT: store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias [[META7]]
158
153
; CHECK-NEXT: br label [[REGION_GUARDED_END11:%.*]]
159
154
; CHECK: region.guarded.end11:
160
155
; CHECK-NEXT: br label [[REGION_BARRIER12]]
161
156
; CHECK: region.barrier12:
162
- ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP12 ]])
157
+ ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP8 ]])
163
158
; CHECK-NEXT: br label [[REGION_EXIT13:%.*]]
164
159
; CHECK: region.exit13:
165
160
; CHECK-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
166
161
; CHECK-NEXT: [[IDXPROM12_I:%.*]] = sext i32 [[CALL11_I]] to i64
167
162
; CHECK-NEXT: [[ARRAYIDX13_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM12_I]]
168
163
; CHECK-NEXT: br label [[REGION_CHECK_TID20:%.*]]
169
164
; CHECK: region.check.tid20:
170
- ; CHECK-NEXT: [[TMP15 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
171
- ; CHECK-NEXT: [[TMP16 :%.*]] = icmp eq i32 [[TMP15 ]], 0
172
- ; CHECK-NEXT: br i1 [[TMP16 ]], label [[REGION_GUARDED19:%.*]], label [[REGION_BARRIER17:%.*]]
165
+ ; CHECK-NEXT: [[TMP10 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
166
+ ; CHECK-NEXT: [[TMP11 :%.*]] = icmp eq i32 [[TMP10 ]], 0
167
+ ; CHECK-NEXT: br i1 [[TMP11 ]], label [[REGION_GUARDED19:%.*]], label [[REGION_BARRIER17:%.*]]
173
168
; CHECK: region.guarded19:
174
- ; CHECK-NEXT: [[TMP17:%.*]] = addrspacecast ptr [[ARRAYIDX13_I]] to ptr addrspace(1)
175
- ; CHECK-NEXT: store i32 [[CALL11_I]], ptr addrspace(1) [[TMP17]], align 4, !noalias [[META7]]
169
+ ; CHECK-NEXT: store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias [[META7]]
176
170
; CHECK-NEXT: br label [[REGION_GUARDED_END16:%.*]]
177
171
; CHECK: region.guarded.end16:
178
172
; CHECK-NEXT: br label [[REGION_BARRIER17]]
179
173
; CHECK: region.barrier17:
180
- ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP15 ]])
174
+ ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP10 ]])
181
175
; CHECK-NEXT: br label [[REGION_EXIT18:%.*]]
182
176
; CHECK: region.exit18:
183
177
; CHECK-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
@@ -238,13 +232,11 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
238
232
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR6]]
239
233
; CHECK-DISABLED-NEXT: store i32 0, ptr [[X]], align 4, !noalias [[META7:![0-9]+]]
240
234
; CHECK-DISABLED-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1
241
- ; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[ARRAYIDX1_I]] to ptr addrspace(1)
242
- ; CHECK-DISABLED-NEXT: store i32 1, ptr addrspace(1) [[TMP2]], align 4, !noalias [[META7]]
235
+ ; CHECK-DISABLED-NEXT: store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias [[META7]]
243
236
; CHECK-DISABLED-NEXT: [[SEXT:%.*]] = shl i64 [[N]], 32
244
237
; CHECK-DISABLED-NEXT: [[IDXPROM_I:%.*]] = ashr exact i64 [[SEXT]], 32
245
238
; CHECK-DISABLED-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM_I]]
246
- ; CHECK-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ARRAYIDX2_I]] to ptr addrspace(1)
247
- ; CHECK-DISABLED-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(1) [[TMP3]], align 4, !noalias [[META7]]
239
+ ; CHECK-DISABLED-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias [[META7]]
248
240
; CHECK-DISABLED-NEXT: call void @usei8ptr(ptr captures(none) [[HEAP2STACK_H2S]]) #[[ATTR9:[0-9]+]]
249
241
; CHECK-DISABLED-NEXT: br label [[FOR_COND_I:%.*]]
250
242
; CHECK-DISABLED: for.cond.i:
@@ -256,27 +248,23 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
256
248
; CHECK-DISABLED-NEXT: [[SUB3_I:%.*]] = add nsw i32 [[I_0_I]], -1
257
249
; CHECK-DISABLED-NEXT: [[IDXPROM4_I:%.*]] = zext i32 [[I_0_I]] to i64
258
250
; CHECK-DISABLED-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM4_I]]
259
- ; CHECK-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ARRAYIDX5_I]] to ptr addrspace(1)
260
- ; CHECK-DISABLED-NEXT: store i32 [[SUB3_I]], ptr addrspace(1) [[TMP4]], align 4, !noalias [[META7]]
251
+ ; CHECK-DISABLED-NEXT: store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias [[META7]]
261
252
; CHECK-DISABLED-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1
262
253
; CHECK-DISABLED-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP10:![0-9]+]]
263
254
; CHECK-DISABLED: __omp_outlined__.exit:
264
255
; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr null, i32 0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr null, i64 0)
265
256
; CHECK-DISABLED-NEXT: [[CALL_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10:[0-9]+]], !noalias [[META7]]
266
257
; CHECK-DISABLED-NEXT: [[IDXPROM6_I:%.*]] = sext i32 [[CALL_I]] to i64
267
258
; CHECK-DISABLED-NEXT: [[ARRAYIDX7_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM6_I]]
268
- ; CHECK-DISABLED-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[ARRAYIDX7_I]] to ptr addrspace(1)
269
- ; CHECK-DISABLED-NEXT: store i32 [[CALL_I]], ptr addrspace(1) [[TMP5]], align 4, !noalias [[META7]]
259
+ ; CHECK-DISABLED-NEXT: store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias [[META7]]
270
260
; CHECK-DISABLED-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
271
261
; CHECK-DISABLED-NEXT: [[IDXPROM9_I:%.*]] = sext i32 [[CALL8_I]] to i64
272
262
; CHECK-DISABLED-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM9_I]]
273
- ; CHECK-DISABLED-NEXT: [[TMP6:%.*]] = addrspacecast ptr [[ARRAYIDX10_I]] to ptr addrspace(1)
274
- ; CHECK-DISABLED-NEXT: store i32 [[CALL8_I]], ptr addrspace(1) [[TMP6]], align 4, !noalias [[META7]]
263
+ ; CHECK-DISABLED-NEXT: store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias [[META7]]
275
264
; CHECK-DISABLED-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
276
265
; CHECK-DISABLED-NEXT: [[IDXPROM12_I:%.*]] = sext i32 [[CALL11_I]] to i64
277
266
; CHECK-DISABLED-NEXT: [[ARRAYIDX13_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM12_I]]
278
- ; CHECK-DISABLED-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[ARRAYIDX13_I]] to ptr addrspace(1)
279
- ; CHECK-DISABLED-NEXT: store i32 [[CALL11_I]], ptr addrspace(1) [[TMP7]], align 4, !noalias [[META7]]
267
+ ; CHECK-DISABLED-NEXT: store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias [[META7]]
280
268
; CHECK-DISABLED-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
281
269
; CHECK-DISABLED-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
282
270
; CHECK-DISABLED-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
0 commit comments