Skip to content

Commit 93eba50

Browse files
Changed type of wg_size to uint32_t everywhere
1 parent 0d59c8f commit 93eba50

File tree

1 file changed

+10
-10
lines changed

1 file changed

+10
-10
lines changed

dpctl/tensor/libtensor/include/kernels/accumulators.hpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -574,7 +574,7 @@ template <typename inputT,
574574
typename ScanOpT,
575575
bool include_initial>
576576
sycl::event inclusive_scan_iter_1d(sycl::queue &exec_q,
577-
const std::size_t wg_size,
577+
const std::uint32_t wg_size,
578578
const std::size_t n_elems,
579579
const inputT *input,
580580
outputT *output,
@@ -768,7 +768,7 @@ accumulate_1d_contig_impl(sycl::queue &q,
768768
const sycl::device &dev = q.get_device();
769769
if (dev.has(sycl::aspect::cpu)) {
770770
constexpr nwiT n_wi_for_cpu = 8;
771-
const std::size_t wg_size = 256;
771+
const std::uint32_t wg_size = 256;
772772
comp_ev = inclusive_scan_iter_1d<srcT, dstT, n_wi_for_cpu, NoOpIndexerT,
773773
transformerT, AccumulateOpT,
774774
include_initial>(
@@ -779,7 +779,7 @@ accumulate_1d_contig_impl(sycl::queue &q,
779779
constexpr nwiT n_wi_for_gpu = 4;
780780
// base_scan_striped algorithm does not execute correctly
781781
// on HIP device with wg_size > 64
782-
const std::size_t wg_size =
782+
const std::uint32_t wg_size =
783783
(q.get_backend() == sycl::backend::ext_oneapi_hip) ? 64 : 256;
784784
comp_ev = inclusive_scan_iter_1d<srcT, dstT, n_wi_for_gpu, NoOpIndexerT,
785785
transformerT, AccumulateOpT,
@@ -812,7 +812,7 @@ template <typename inputT,
812812
typename ScanOpT,
813813
bool include_initial>
814814
sycl::event inclusive_scan_iter(sycl::queue &exec_q,
815-
const std::size_t wg_size,
815+
const std::uint32_t wg_size,
816816
const std::size_t iter_nelems,
817817
const std::size_t acc_nelems,
818818
const inputT *input,
@@ -1173,7 +1173,7 @@ accumulate_strided_impl(sycl::queue &q,
11731173
sycl::event comp_ev;
11741174
if (dev.has(sycl::aspect::cpu)) {
11751175
constexpr nwiT n_wi_for_cpu = 8;
1176-
const std::size_t wg_size = 256;
1176+
const std::uint32_t wg_size = 256;
11771177
comp_ev =
11781178
inclusive_scan_iter<srcT, dstT, n_wi_for_cpu, InpIndexerT,
11791179
OutIndexerT, InpIndexerT, OutIndexerT,
@@ -1186,7 +1186,7 @@ accumulate_strided_impl(sycl::queue &q,
11861186
constexpr nwiT n_wi_for_gpu = 4;
11871187
// base_scan_striped algorithm does not execute correctly
11881188
// on HIP device with wg_size > 64
1189-
const std::size_t wg_size =
1189+
const std::uint32_t wg_size =
11901190
(q.get_backend() == sycl::backend::ext_oneapi_hip) ? 64 : 256;
11911191
comp_ev =
11921192
inclusive_scan_iter<srcT, dstT, n_wi_for_gpu, InpIndexerT,
@@ -1232,7 +1232,7 @@ std::size_t cumsum_val_contig_impl(sycl::queue &q,
12321232
const sycl::device &dev = q.get_device();
12331233
if (dev.has(sycl::aspect::cpu)) {
12341234
constexpr nwiT n_wi_for_cpu = 8;
1235-
const std::size_t wg_size = 256;
1235+
const std::uint32_t wg_size = 256;
12361236
comp_ev = inclusive_scan_iter_1d<maskT, cumsumT, n_wi_for_cpu,
12371237
NoOpIndexerT, transformerT,
12381238
AccumulateOpT, include_initial>(
@@ -1243,7 +1243,7 @@ std::size_t cumsum_val_contig_impl(sycl::queue &q,
12431243
constexpr nwiT n_wi_for_gpu = 4;
12441244
// base_scan_striped algorithm does not execute correctly
12451245
// on HIP device with wg_size > 64
1246-
const std::size_t wg_size =
1246+
const std::uint32_t wg_size =
12471247
(q.get_backend() == sycl::backend::ext_oneapi_hip) ? 64 : 256;
12481248
comp_ev = inclusive_scan_iter_1d<maskT, cumsumT, n_wi_for_gpu,
12491249
NoOpIndexerT, transformerT,
@@ -1346,7 +1346,7 @@ cumsum_val_strided_impl(sycl::queue &q,
13461346
sycl::event comp_ev;
13471347
if (dev.has(sycl::aspect::cpu)) {
13481348
constexpr nwiT n_wi_for_cpu = 8;
1349-
const std::size_t wg_size = 256;
1349+
const std::uint32_t wg_size = 256;
13501350
comp_ev = inclusive_scan_iter_1d<maskT, cumsumT, n_wi_for_cpu,
13511351
StridedIndexerT, transformerT,
13521352
AccumulateOpT, include_initial>(
@@ -1357,7 +1357,7 @@ cumsum_val_strided_impl(sycl::queue &q,
13571357
constexpr nwiT n_wi_for_gpu = 4;
13581358
// base_scan_striped algorithm does not execute correctly
13591359
// on HIP device with wg_size > 64
1360-
const std::size_t wg_size =
1360+
const std::uint32_t wg_size =
13611361
(q.get_backend() == sycl::backend::ext_oneapi_hip) ? 64 : 256;
13621362
comp_ev = inclusive_scan_iter_1d<maskT, cumsumT, n_wi_for_gpu,
13631363
StridedIndexerT, transformerT,

0 commit comments

Comments
 (0)