@@ -574,7 +574,7 @@ template <typename inputT,
574
574
typename ScanOpT,
575
575
bool include_initial>
576
576
sycl::event inclusive_scan_iter_1d (sycl::queue &exec_q,
577
- const std::size_t wg_size,
577
+ const std::uint32_t wg_size,
578
578
const std::size_t n_elems,
579
579
const inputT *input,
580
580
outputT *output,
@@ -768,7 +768,7 @@ accumulate_1d_contig_impl(sycl::queue &q,
768
768
const sycl::device &dev = q.get_device ();
769
769
if (dev.has (sycl::aspect::cpu)) {
770
770
constexpr nwiT n_wi_for_cpu = 8 ;
771
- const std::size_t wg_size = 256 ;
771
+ const std::uint32_t wg_size = 256 ;
772
772
comp_ev = inclusive_scan_iter_1d<srcT, dstT, n_wi_for_cpu, NoOpIndexerT,
773
773
transformerT, AccumulateOpT,
774
774
include_initial>(
@@ -779,7 +779,7 @@ accumulate_1d_contig_impl(sycl::queue &q,
779
779
constexpr nwiT n_wi_for_gpu = 4 ;
780
780
// base_scan_striped algorithm does not execute correctly
781
781
// on HIP device with wg_size > 64
782
- const std::size_t wg_size =
782
+ const std::uint32_t wg_size =
783
783
(q.get_backend () == sycl::backend::ext_oneapi_hip) ? 64 : 256 ;
784
784
comp_ev = inclusive_scan_iter_1d<srcT, dstT, n_wi_for_gpu, NoOpIndexerT,
785
785
transformerT, AccumulateOpT,
@@ -812,7 +812,7 @@ template <typename inputT,
812
812
typename ScanOpT,
813
813
bool include_initial>
814
814
sycl::event inclusive_scan_iter (sycl::queue &exec_q,
815
- const std::size_t wg_size,
815
+ const std::uint32_t wg_size,
816
816
const std::size_t iter_nelems,
817
817
const std::size_t acc_nelems,
818
818
const inputT *input,
@@ -1173,7 +1173,7 @@ accumulate_strided_impl(sycl::queue &q,
1173
1173
sycl::event comp_ev;
1174
1174
if (dev.has (sycl::aspect::cpu)) {
1175
1175
constexpr nwiT n_wi_for_cpu = 8 ;
1176
- const std::size_t wg_size = 256 ;
1176
+ const std::uint32_t wg_size = 256 ;
1177
1177
comp_ev =
1178
1178
inclusive_scan_iter<srcT, dstT, n_wi_for_cpu, InpIndexerT,
1179
1179
OutIndexerT, InpIndexerT, OutIndexerT,
@@ -1186,7 +1186,7 @@ accumulate_strided_impl(sycl::queue &q,
1186
1186
constexpr nwiT n_wi_for_gpu = 4 ;
1187
1187
// base_scan_striped algorithm does not execute correctly
1188
1188
// on HIP device with wg_size > 64
1189
- const std::size_t wg_size =
1189
+ const std::uint32_t wg_size =
1190
1190
(q.get_backend () == sycl::backend::ext_oneapi_hip) ? 64 : 256 ;
1191
1191
comp_ev =
1192
1192
inclusive_scan_iter<srcT, dstT, n_wi_for_gpu, InpIndexerT,
@@ -1232,7 +1232,7 @@ std::size_t cumsum_val_contig_impl(sycl::queue &q,
1232
1232
const sycl::device &dev = q.get_device ();
1233
1233
if (dev.has (sycl::aspect::cpu)) {
1234
1234
constexpr nwiT n_wi_for_cpu = 8 ;
1235
- const std::size_t wg_size = 256 ;
1235
+ const std::uint32_t wg_size = 256 ;
1236
1236
comp_ev = inclusive_scan_iter_1d<maskT, cumsumT, n_wi_for_cpu,
1237
1237
NoOpIndexerT, transformerT,
1238
1238
AccumulateOpT, include_initial>(
@@ -1243,7 +1243,7 @@ std::size_t cumsum_val_contig_impl(sycl::queue &q,
1243
1243
constexpr nwiT n_wi_for_gpu = 4 ;
1244
1244
// base_scan_striped algorithm does not execute correctly
1245
1245
// on HIP device with wg_size > 64
1246
- const std::size_t wg_size =
1246
+ const std::uint32_t wg_size =
1247
1247
(q.get_backend () == sycl::backend::ext_oneapi_hip) ? 64 : 256 ;
1248
1248
comp_ev = inclusive_scan_iter_1d<maskT, cumsumT, n_wi_for_gpu,
1249
1249
NoOpIndexerT, transformerT,
@@ -1346,7 +1346,7 @@ cumsum_val_strided_impl(sycl::queue &q,
1346
1346
sycl::event comp_ev;
1347
1347
if (dev.has (sycl::aspect::cpu)) {
1348
1348
constexpr nwiT n_wi_for_cpu = 8 ;
1349
- const std::size_t wg_size = 256 ;
1349
+ const std::uint32_t wg_size = 256 ;
1350
1350
comp_ev = inclusive_scan_iter_1d<maskT, cumsumT, n_wi_for_cpu,
1351
1351
StridedIndexerT, transformerT,
1352
1352
AccumulateOpT, include_initial>(
@@ -1357,7 +1357,7 @@ cumsum_val_strided_impl(sycl::queue &q,
1357
1357
constexpr nwiT n_wi_for_gpu = 4 ;
1358
1358
// base_scan_striped algorithm does not execute correctly
1359
1359
// on HIP device with wg_size > 64
1360
- const std::size_t wg_size =
1360
+ const std::uint32_t wg_size =
1361
1361
(q.get_backend () == sycl::backend::ext_oneapi_hip) ? 64 : 256 ;
1362
1362
comp_ev = inclusive_scan_iter_1d<maskT, cumsumT, n_wi_for_gpu,
1363
1363
StridedIndexerT, transformerT,
0 commit comments