@@ -215,16 +215,17 @@ inclusive_scan_base_step(sycl::queue &exec_q,
215
215
const size_t gid = it.get_global_id (0 );
216
216
const size_t lid = it.get_local_id (0 );
217
217
218
- const size_t iter_gid = gid / (acc_groups * wg_size);
219
- const size_t chunk_gid = gid - (iter_gid * acc_groups * wg_size);
218
+ const size_t reduce_chunks = acc_groups * wg_size;
219
+ const size_t iter_gid = gid / reduce_chunks;
220
+ const size_t chunk_gid = gid - (iter_gid * reduce_chunks);
220
221
221
- std::array<outputT, n_wi> local_iscan;
222
-
223
- size_t i = chunk_gid * n_wi;
222
+ const size_t i = chunk_gid * n_wi;
224
223
const auto &iter_offsets = iter_indexer (iter_gid);
225
224
const auto &inp_iter_offset = iter_offsets.get_first_offset ();
226
225
const auto &out_iter_offset = iter_offsets.get_second_offset ();
227
226
227
+ std::array<outputT, n_wi> local_iscan;
228
+
228
229
#pragma unroll
229
230
for (nwiT m_wi = 0 ; m_wi < n_wi; ++m_wi) {
230
231
const size_t i_m_wi = i + m_wi;
@@ -324,7 +325,7 @@ sycl::event inclusive_scan_iter_1d(sycl::queue &exec_q,
324
325
std::vector<sycl::event> &host_tasks,
325
326
const std::vector<sycl::event> &depends = {})
326
327
{
327
- ScanOpT scan_op = ScanOpT () ;
328
+ ScanOpT scan_op{} ;
328
329
constexpr outputT identity = su_ns::Identity<ScanOpT, outputT>::value;
329
330
330
331
constexpr size_t _iter_nelems = 1 ;
@@ -352,9 +353,9 @@ sycl::event inclusive_scan_iter_1d(sycl::queue &exec_q,
352
353
size_t n_groups_ = n_groups;
353
354
size_t temp_size = 0 ;
354
355
while (n_groups_ > 1 ) {
355
- const auto this_size = (n_groups_ - 1 );
356
+ const size_t this_size = (n_groups_ - 1 );
356
357
temp_size += this_size;
357
- n_groups_ = ceiling_quotient< size_t > (this_size, chunk_size);
358
+ n_groups_ = ceiling_quotient (this_size, chunk_size);
358
359
}
359
360
360
361
// allocate
0 commit comments