File tree Expand file tree Collapse file tree 1 file changed +9
-10
lines changed
dpctl/tensor/libtensor/include/kernels Expand file tree Collapse file tree 1 file changed +9
-10
lines changed Original file line number Diff line number Diff line change @@ -227,23 +227,22 @@ inclusive_scan_base_step(sycl::queue &exec_q,
227
227
228
228
#pragma unroll
229
229
for (nwiT m_wi = 0 ; m_wi < n_wi; ++m_wi) {
230
+ const size_t i_m_wi = i + m_wi;
230
231
if constexpr (!include_initial) {
231
232
local_iscan[m_wi] =
232
- (i + m_wi < acc_nelems)
233
- ? transformer (
234
- input[inp_iter_offset +
235
- inp_indexer (s0 + s1 * (i + m_wi))])
233
+ (i_m_wi < acc_nelems)
234
+ ? transformer (input[inp_iter_offset +
235
+ inp_indexer (s0 + s1 * i_m_wi)])
236
236
: identity;
237
237
}
238
238
else {
239
239
// shift input to the left by a single element relative to
240
240
// output
241
241
local_iscan[m_wi] =
242
- (i + m_wi < acc_nelems && i + m_wi > 0 )
242
+ (i_m_wi < acc_nelems && i_m_wi > 0 )
243
243
? transformer (
244
244
input[inp_iter_offset +
245
- inp_indexer ((s0 + s1 * (i + m_wi)) -
246
- 1 )])
245
+ inp_indexer ((s0 + s1 * i_m_wi) - 1 )])
247
246
: identity;
248
247
}
249
248
}
@@ -280,9 +279,9 @@ inclusive_scan_base_step(sycl::queue &exec_q,
280
279
local_iscan[m_wi] = scan_op (local_iscan[m_wi], addand);
281
280
}
282
281
283
- for ( nwiT m_wi = 0 ; (m_wi < n_wi) && (i + m_wi < acc_nelems);
284
- ++m_wi)
285
- {
282
+ const nwiT m_max =
283
+ std::min<nwiT>(n_wi, std::max (i, acc_nelems) - i);
284
+ for (nwiT m_wi = 0 ; m_wi < m_max; ++m_wi) {
286
285
output[out_iter_offset + out_indexer (i + m_wi)] =
287
286
local_iscan[m_wi];
288
287
}
You can’t perform that action at this time.
0 commit comments