Skip to content

Commit db0caa6

Browse files
committed
remove static from calculate_offset fn and use ceil_div
1 parent 22dbcdf commit db0caa6

File tree

2 files changed

+5
-4
lines changed

2 files changed

+5
-4
lines changed

ggml/src/ggml-sycl/common.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -472,10 +472,10 @@ static __dpct_inline__ float warp_reduce_max(float x,
472472
return x;
473473
}
474474

475-
/* Helper for Computing the linear offset into an 4-dimensional ggml_tensor given
475+
/* Helper for Computing the linear offset of a ggml_tensor given
476476
per-dimension sizes, strides, and indices */
477477
template<int N>
478-
static __dpct_inline__ size_t calculate_offset(const std::array<int, N> & strides, const std::array<int, N> & indices) {
478+
__dpct_inline__ size_t calculate_offset(const std::array<int, N> & strides, const std::array<int, N> & indices) {
479479
size_t offset = 0;
480480
#pragma unroll
481481
for (int i = 0; i < N; i++) {

ggml/src/ggml-sycl/norm.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "norm.hpp"
22
#include "ggml-sycl/common.hpp"
3+
#include "ggml-sycl/presets.hpp"
34

45
static void norm_f32(const float* x, float* dst, const int ncols, const int64_t stride_row, const int64_t stride_channel,
56
const int64_t stride_sample, const float eps, const sycl::nd_item<3>& item_ct1, sycl::float2* s_sum, int block_size) {
@@ -40,7 +41,7 @@ static void norm_f32(const float* x, float* dst, const int ncols, const int64_t
4041
}
4142
item_ct1.barrier(sycl::access::fence_space::local_space);
4243
mean_var = 0.f;
43-
const size_t nreduce = (nwarps + WARP_SIZE - 1) / WARP_SIZE;
44+
const size_t nreduce = ceil_div(nwarps, WARP_SIZE);
4445
for (size_t i = 0; i < nreduce; i += 1)
4546
{
4647
mean_var += s_sum[wi_in_sg + i * WARP_SIZE];
@@ -184,7 +185,7 @@ static void rms_norm_f32(const float* x, float* dst, const int ncols, const int6
184185
}
185186

186187
item_ct1.barrier(sycl::access::fence_space::local_space);
187-
const size_t nreduce = (nwarps + WARP_SIZE - 1) / WARP_SIZE;
188+
const size_t nreduce = ceil_div(nwarps, WARP_SIZE);
188189
tmp = 0.f;
189190
for (size_t i = 0; i < nreduce; i += 1)
190191
{

0 commit comments

Comments
 (0)