Skip to content

Commit 8edf7f3

Browse files
authored
Merge pull request #1950 from IntelPython/use-std-size-t-cstddef
[MAINT] Use `std::size_t` from `cstddef` and use `dpctl::tensor::ssize_t` where `ssize_t` is used
2 parents 3a1a7c5 + 919d772 commit 8edf7f3

File tree

154 files changed

+2124
-1860
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

154 files changed

+2124
-1860
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
4242
* Add support of CV-qualifiers in `is_complex<T>` helper [gh-1900](https://github.com/IntelPython/dpctl/pull/1900)
4343
* Tuning work for elementwise functions with modest performance gains (under 10%) [gh-1889](https://github.com/IntelPython/dpctl/pull/1889)
4444
* Support for Python 3.13 for `dpctl` [gh-1941](https://github.com/IntelPython/dpctl/pull/1941)
45+
* Change libtensor to use `std::size_t` and `dpctl::tensor::ssize_t` throughout and fix missing includes for `std::size_t` and `size_t` [gh-1950](https://github.com/IntelPython/dpctl/pull/1950)
4546

4647
## [0.18.3] - Dec. 07, 2024
4748

dpctl/_host_task_util.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
#pragma once
3333
#include <exception>
34+
#include <stddef.h>
3435
#include <sycl/sycl.hpp>
3536

3637
#include "Python.h"

dpctl/apis/include/dpctl4pybind11.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,10 @@
2727

2828
#include "dpctl_capi.h"
2929
#include <complex>
30+
#include <cstddef> // for std::size_t for C++ linkage
3031
#include <memory>
3132
#include <pybind11/pybind11.h>
33+
#include <stddef.h> // for size_t for C linkage
3234
#include <stdexcept>
3335
#include <sycl/sycl.hpp>
3436
#include <utility>
@@ -759,7 +761,7 @@ class usm_memory : public py::object
759761
* lifetime of the USM allocation.
760762
*/
761763
usm_memory(void *usm_ptr,
762-
size_t nbytes,
764+
std::size_t nbytes,
763765
const sycl::queue &q,
764766
std::shared_ptr<void> shptr)
765767
{
@@ -819,7 +821,7 @@ class usm_memory : public py::object
819821
return reinterpret_cast<char *>(MRef);
820822
}
821823

822-
size_t get_nbytes() const
824+
std::size_t get_nbytes() const
823825
{
824826
auto const &api = ::dpctl::detail::dpctl_capi::get();
825827
Py_MemoryObject *mem_obj = reinterpret_cast<Py_MemoryObject *>(m_ptr);

dpctl/tensor/libtensor/include/kernels/accumulators.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
#pragma once
2626
#include <array>
27+
#include <cstddef>
2728
#include <cstdint>
2829
#include <limits>
2930
#include <new>
@@ -47,6 +48,7 @@ namespace kernels
4748
namespace accumulators
4849
{
4950

51+
using dpctl::tensor::ssize_t;
5052
using namespace dpctl::tensor::offset_utils;
5153

5254
template <typename T> T ceiling_quotient(T n, T m) { return (n + m - 1) / m; }

dpctl/tensor/libtensor/include/kernels/alignment.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ namespace kernels
3030
namespace alignment_utils
3131
{
3232

33-
static constexpr size_t required_alignment = 64UL;
33+
static constexpr std::size_t required_alignment = 64UL;
3434

3535
template <std::uintptr_t alignment, typename Ptr> bool is_aligned(Ptr p)
3636
{

dpctl/tensor/libtensor/include/kernels/boolean_advanced_indexing.hpp

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
//===---------------------------------------------------------------------===//
2424

2525
#pragma once
26+
#include <cstddef>
2627
#include <cstdint>
2728
#include <limits>
2829
#include <sycl/sycl.hpp>
@@ -42,6 +43,7 @@ namespace kernels
4243
namespace indexing
4344
{
4445

46+
using dpctl::tensor::ssize_t;
4547
using namespace dpctl::tensor::offset_utils;
4648

4749
template <typename OrthogIndexerT,
@@ -55,7 +57,7 @@ struct MaskedExtractStridedFunctor
5557
MaskedExtractStridedFunctor(const dataT *src_data_p,
5658
const indT *cumsum_data_p,
5759
dataT *dst_data_p,
58-
size_t masked_iter_size,
60+
std::size_t masked_iter_size,
5961
const OrthogIndexerT &orthog_src_dst_indexer_,
6062
const MaskedSrcIndexerT &masked_src_indexer_,
6163
const MaskedDstIndexerT &masked_dst_indexer_,
@@ -81,7 +83,7 @@ struct MaskedExtractStridedFunctor
8183

8284
const std::size_t max_offset = masked_nelems + 1;
8385
for (std::uint32_t i = l_i; i < lacc.size(); i += lws) {
84-
const size_t offset = masked_block_start + i;
86+
const std::size_t offset = masked_block_start + i;
8587
lacc[i] = (offset == 0) ? indT(0)
8688
: (offset < max_offset) ? cumsum[offset - 1]
8789
: cumsum[masked_nelems - 1] + 1;
@@ -99,9 +101,10 @@ struct MaskedExtractStridedFunctor
99101
if (mask_set && (masked_i < masked_nelems)) {
100102
const auto &orthog_offsets = orthog_src_dst_indexer(orthog_i);
101103

102-
const size_t total_src_offset = masked_src_indexer(masked_i) +
103-
orthog_offsets.get_first_offset();
104-
const size_t total_dst_offset =
104+
const std::size_t total_src_offset =
105+
masked_src_indexer(masked_i) +
106+
orthog_offsets.get_first_offset();
107+
const std::size_t total_dst_offset =
105108
masked_dst_indexer(current_running_count - 1) +
106109
orthog_offsets.get_second_offset();
107110

@@ -113,7 +116,7 @@ struct MaskedExtractStridedFunctor
113116
const dataT *src = nullptr;
114117
const indT *cumsum = nullptr;
115118
dataT *dst = nullptr;
116-
const size_t masked_nelems = 0;
119+
const std::size_t masked_nelems = 0;
117120
// has nd, shape, src_strides, dst_strides for
118121
// dimensions that ARE NOT masked
119122
const OrthogIndexerT orthog_src_dst_indexer;
@@ -136,7 +139,7 @@ struct MaskedPlaceStridedFunctor
136139
MaskedPlaceStridedFunctor(dataT *dst_data_p,
137140
const indT *cumsum_data_p,
138141
const dataT *rhs_data_p,
139-
size_t masked_iter_size,
142+
std::size_t masked_iter_size,
140143
const OrthogIndexerT &orthog_dst_rhs_indexer_,
141144
const MaskedDstIndexerT &masked_dst_indexer_,
142145
const MaskedRhsIndexerT &masked_rhs_indexer_,
@@ -157,12 +160,12 @@ struct MaskedPlaceStridedFunctor
157160
const std::uint32_t l_i = ndit.get_local_id(1);
158161
const std::uint32_t lws = ndit.get_local_range(1);
159162

160-
const size_t masked_i = ndit.get_global_id(1);
161-
const size_t masked_block_start = masked_i - l_i;
163+
const std::size_t masked_i = ndit.get_global_id(1);
164+
const std::size_t masked_block_start = masked_i - l_i;
162165

163166
const std::size_t max_offset = masked_nelems + 1;
164167
for (std::uint32_t i = l_i; i < lacc.size(); i += lws) {
165-
const size_t offset = masked_block_start + i;
168+
const std::size_t offset = masked_block_start + i;
166169
lacc[i] = (offset == 0) ? indT(0)
167170
: (offset < max_offset) ? cumsum[offset - 1]
168171
: cumsum[masked_nelems - 1] + 1;
@@ -180,9 +183,10 @@ struct MaskedPlaceStridedFunctor
180183
if (mask_set && (masked_i < masked_nelems)) {
181184
const auto &orthog_offsets = orthog_dst_rhs_indexer(orthog_i);
182185

183-
const size_t total_dst_offset = masked_dst_indexer(masked_i) +
184-
orthog_offsets.get_first_offset();
185-
const size_t total_rhs_offset =
186+
const std::size_t total_dst_offset =
187+
masked_dst_indexer(masked_i) +
188+
orthog_offsets.get_first_offset();
189+
const std::size_t total_rhs_offset =
186190
masked_rhs_indexer(current_running_count - 1) +
187191
orthog_offsets.get_second_offset();
188192

@@ -194,7 +198,7 @@ struct MaskedPlaceStridedFunctor
194198
dataT *dst = nullptr;
195199
const indT *cumsum = nullptr;
196200
const dataT *rhs = nullptr;
197-
const size_t masked_nelems = 0;
201+
const std::size_t masked_nelems = 0;
198202
// has nd, shape, dst_strides, rhs_strides for
199203
// dimensions that ARE NOT masked
200204
const OrthogIndexerT orthog_dst_rhs_indexer;
@@ -450,8 +454,8 @@ sycl::event masked_extract_some_slices_strided_impl(
450454

451455
const std::size_t lws = get_lws(masked_extent);
452456

453-
const size_t n_groups = ((masked_extent + lws - 1) / lws);
454-
const size_t orthog_extent = static_cast<size_t>(orthog_nelems);
457+
const std::size_t n_groups = ((masked_extent + lws - 1) / lws);
458+
const std::size_t orthog_extent = static_cast<std::size_t>(orthog_nelems);
455459

456460
sycl::range<2> gRange{orthog_extent, n_groups * lws};
457461
sycl::range<2> lRange{1, lws};
@@ -809,7 +813,7 @@ sycl::event non_zero_indexes_impl(sycl::queue &exec_q,
809813
const std::size_t masked_block_start = group_i * lws;
810814

811815
for (std::uint32_t i = l_i; i < lacc.size(); i += lws) {
812-
const size_t offset = masked_block_start + i;
816+
const std::size_t offset = masked_block_start + i;
813817
lacc[i] = (offset == 0) ? indT1(0)
814818
: (offset - 1 < masked_extent)
815819
? cumsum_data[offset - 1]

dpctl/tensor/libtensor/include/kernels/clip.hpp

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#pragma once
2626
#include <algorithm>
2727
#include <complex>
28+
#include <cstddef>
2829
#include <cstdint>
2930
#include <sycl/sycl.hpp>
3031
#include <type_traits>
@@ -45,6 +46,7 @@ namespace kernels
4546
namespace clip
4647
{
4748

49+
using dpctl::tensor::ssize_t;
4850
using namespace dpctl::tensor::offset_utils;
4951

5052
using dpctl::tensor::kernels::alignment_utils::
@@ -85,14 +87,14 @@ template <typename T,
8587
class ClipContigFunctor
8688
{
8789
private:
88-
size_t nelems = 0;
90+
std::size_t nelems = 0;
8991
const T *x_p = nullptr;
9092
const T *min_p = nullptr;
9193
const T *max_p = nullptr;
9294
T *dst_p = nullptr;
9395

9496
public:
95-
ClipContigFunctor(size_t nelems_,
97+
ClipContigFunctor(std::size_t nelems_,
9698
const T *x_p_,
9799
const T *min_p_,
98100
const T *max_p_,
@@ -110,30 +112,30 @@ class ClipContigFunctor
110112
if constexpr (is_complex<T>::value || !enable_sg_loadstore) {
111113
const std::uint16_t sgSize =
112114
ndit.get_sub_group().get_local_range()[0];
113-
const size_t gid = ndit.get_global_linear_id();
114-
const uint16_t nelems_per_sg = sgSize * nelems_per_wi;
115+
const std::size_t gid = ndit.get_global_linear_id();
116+
const std::uint16_t nelems_per_sg = sgSize * nelems_per_wi;
115117

116-
const size_t start =
118+
const std::size_t start =
117119
(gid / sgSize) * (nelems_per_sg - sgSize) + gid;
118-
const size_t end = std::min(nelems, start + nelems_per_sg);
120+
const std::size_t end = std::min(nelems, start + nelems_per_sg);
119121

120-
for (size_t offset = start; offset < end; offset += sgSize) {
122+
for (std::size_t offset = start; offset < end; offset += sgSize) {
121123
dst_p[offset] = clip(x_p[offset], min_p[offset], max_p[offset]);
122124
}
123125
}
124126
else {
125127
auto sg = ndit.get_sub_group();
126128
const std::uint16_t sgSize = sg.get_max_local_range()[0];
127129

128-
const size_t base =
130+
const std::size_t base =
129131
nelems_per_wi * (ndit.get_group(0) * ndit.get_local_range(0) +
130132
sg.get_group_id()[0] * sgSize);
131133

132134
if (base + nelems_per_wi * sgSize < nelems) {
133135
sycl::vec<T, vec_sz> dst_vec;
134136
#pragma unroll
135137
for (std::uint8_t it = 0; it < n_vecs * vec_sz; it += vec_sz) {
136-
const size_t idx = base + it * sgSize;
138+
const std::size_t idx = base + it * sgSize;
137139
auto x_multi_ptr = sycl::address_space_cast<
138140
sycl::access::address_space::global_space,
139141
sycl::access::decorated::yes>(&x_p[idx]);
@@ -162,8 +164,8 @@ class ClipContigFunctor
162164
}
163165
}
164166
else {
165-
const size_t lane_id = sg.get_local_id()[0];
166-
for (size_t k = base + lane_id; k < nelems; k += sgSize) {
167+
const std::size_t lane_id = sg.get_local_id()[0];
168+
for (std::size_t k = base + lane_id; k < nelems; k += sgSize) {
167169
dst_p[k] = clip(x_p[k], min_p[k], max_p[k]);
168170
}
169171
}
@@ -175,7 +177,7 @@ template <typename T, int vec_sz, int n_vecs> class clip_contig_kernel;
175177

176178
typedef sycl::event (*clip_contig_impl_fn_ptr_t)(
177179
sycl::queue &,
178-
size_t,
180+
std::size_t,
179181
const char *,
180182
const char *,
181183
const char *,
@@ -184,7 +186,7 @@ typedef sycl::event (*clip_contig_impl_fn_ptr_t)(
184186

185187
template <typename T>
186188
sycl::event clip_contig_impl(sycl::queue &q,
187-
size_t nelems,
189+
std::size_t nelems,
188190
const char *x_cp,
189191
const char *min_cp,
190192
const char *max_cp,
@@ -199,10 +201,10 @@ sycl::event clip_contig_impl(sycl::queue &q,
199201
sycl::event clip_ev = q.submit([&](sycl::handler &cgh) {
200202
cgh.depends_on(depends);
201203

202-
size_t lws = 64;
204+
std::size_t lws = 64;
203205
constexpr std::uint8_t vec_sz = 4;
204206
constexpr std::uint8_t n_vecs = 2;
205-
const size_t n_groups =
207+
const std::size_t n_groups =
206208
((nelems + lws * n_vecs * vec_sz - 1) / (lws * n_vecs * vec_sz));
207209
const auto gws_range = sycl::range<1>(n_groups * lws);
208210
const auto lws_range = sycl::range<1>(lws);
@@ -258,7 +260,7 @@ template <typename T, typename IndexerT> class ClipStridedFunctor
258260

259261
void operator()(sycl::id<1> id) const
260262
{
261-
size_t gid = id[0];
263+
std::size_t gid = id[0];
262264
auto offsets = indexer(static_cast<ssize_t>(gid));
263265
dst_p[offsets.get_fourth_offset()] = clip(
264266
x_p[offsets.get_first_offset()], min_p[offsets.get_second_offset()],
@@ -270,7 +272,7 @@ template <typename T, typename IndexerT> class clip_strided_kernel;
270272

271273
typedef sycl::event (*clip_strided_impl_fn_ptr_t)(
272274
sycl::queue &,
273-
size_t,
275+
std::size_t,
274276
int,
275277
const char *,
276278
const char *,
@@ -285,7 +287,7 @@ typedef sycl::event (*clip_strided_impl_fn_ptr_t)(
285287

286288
template <typename T>
287289
sycl::event clip_strided_impl(sycl::queue &q,
288-
size_t nelems,
290+
std::size_t nelems,
289291
int nd,
290292
const char *x_cp,
291293
const char *min_cp,

0 commit comments

Comments
 (0)