Skip to content

Commit 6744dbe

Browse files
authored
ggml : use ggml_row_size where possible (#4472)
* ggml : use ggml_row_size where possible ggml-ci * ggml : move ggml_nbytes_split to ggml-cuda.cu
1 parent cafcd4f commit 6744dbe

File tree

5 files changed

+24
-26
lines changed

5 files changed

+24
-26
lines changed

ggml-cuda.cu

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8898,6 +8898,12 @@ static void ggml_cuda_nop(const ggml_tensor * src0, const ggml_tensor * src1, gg
88988898
(void) dst;
88998899
}
89008900

8901+
static size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split) {
8902+
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
8903+
8904+
return nrows_split*ggml_row_size(tensor->type, tensor->ne[0]);
8905+
}
8906+
89018907
void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor) {
89028908
const int64_t nrows = ggml_nrows(tensor);
89038909

@@ -8947,8 +8953,7 @@ void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor) {
89478953

89488954
// pad last row to a multiple of 512 elements to avoid out-of-bounds memory accesses
89498955
if (ne0 % MATRIX_ROW_PADDING != 0) {
8950-
size += (MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING)
8951-
* ggml_type_size(tensor->type)/ggml_blck_size(tensor->type);
8956+
size += ggml_row_size(tensor->type, MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING);
89528957
}
89538958

89548959
char * buf;
@@ -9485,8 +9490,7 @@ static size_t ggml_backend_cuda_buffer_type_get_alloc_size(ggml_backend_buffer_t
94859490

94869491
if (ggml_is_quantized(tensor->type)) {
94879492
if (ne0 % MATRIX_ROW_PADDING != 0) {
9488-
size += (MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING)
9489-
* ggml_type_size(tensor->type)/ggml_blck_size(tensor->type);
9493+
size += ggml_row_size(tensor->type, MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING);
94909494
}
94919495
}
94929496

ggml.c

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1997,12 +1997,6 @@ size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
19971997
return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN);
19981998
}
19991999

2000-
size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split) {
2001-
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
2002-
2003-
return (nrows_split*tensor->ne[0]*ggml_type_size(tensor->type))/ggml_blck_size(tensor->type);
2004-
}
2005-
20062000
int ggml_blck_size(enum ggml_type type) {
20072001
return type_traits[type].blck_size;
20082002
}
@@ -2491,7 +2485,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
24912485
view_src = view_src->view_src;
24922486
}
24932487

2494-
size_t data_size = ggml_type_size(type)*(ne[0]/ggml_blck_size(type));
2488+
size_t data_size = ggml_row_size(type, ne[0]);
24952489
for (int i = 1; i < n_dims; i++) {
24962490
data_size *= ne[i];
24972491
}
@@ -9698,7 +9692,7 @@ static void ggml_compute_forward_mul_mat(
96989692
if (params->type == GGML_TASK_INIT) {
96999693
if (src1->type != vec_dot_type) {
97009694
char * wdata = params->wdata;
9701-
const size_t row_size = ne10*ggml_type_size(vec_dot_type)/ggml_blck_size(vec_dot_type);
9695+
const size_t row_size = ggml_row_size(vec_dot_type, ne10);
97029696

97039697
assert(params->wsize >= ne11*ne12*ne13*row_size);
97049698
assert(src1->type == GGML_TYPE_F32);
@@ -9721,7 +9715,7 @@ static void ggml_compute_forward_mul_mat(
97219715
}
97229716

97239717
const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata;
9724-
const size_t row_size = ne10*ggml_type_size(vec_dot_type)/ggml_blck_size(vec_dot_type);
9718+
const size_t row_size = ggml_row_size(vec_dot_type, ne10);
97259719

97269720
const int64_t nr0 = ne01; // src0 rows
97279721
const int64_t nr1 = cne1*ne12*ne13; // src1 rows
@@ -16326,7 +16320,7 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
1632616320
} else
1632716321
#endif
1632816322
if (node->src[1]->type != vec_dot_type) {
16329-
cur = ggml_type_size(vec_dot_type)*ggml_nelements(node->src[1])/ggml_blck_size(vec_dot_type);
16323+
cur = ggml_row_size(vec_dot_type, ggml_nelements(node->src[1]));
1633016324
}
1633116325
} break;
1633216326
case GGML_OP_MUL_MAT_ID:
@@ -16343,7 +16337,7 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
1634316337
} else
1634416338
#endif
1634516339
if (b->type != vec_dot_type) {
16346-
cur = ggml_type_size(vec_dot_type)*ggml_nelements(b)/ggml_blck_size(vec_dot_type);
16340+
cur = ggml_row_size(vec_dot_type, ggml_nelements(b));
1634716341
}
1634816342
} break;
1634916343
case GGML_OP_OUT_PROD:
@@ -18703,7 +18697,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
1870318697
return NULL;
1870418698
}
1870518699

18706-
const size_t size_cur = (ne*ggml_type_size(info->type))/ggml_blck_size(info->type);
18700+
const size_t size_cur = ggml_row_size(info->type, ne);
1870718701

1870818702
ctx->size += GGML_PAD(size_cur, ctx->alignment);
1870918703
}

ggml.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -638,7 +638,6 @@ extern "C" {
638638
GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);
639639
GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
640640
GGML_API size_t ggml_nbytes_pad (const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
641-
GGML_API size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split);
642641

643642
GGML_API int ggml_blck_size(enum ggml_type type);
644643
GGML_API size_t ggml_type_size(enum ggml_type type); // size in bytes for all elements in a block

tests/test-backend-ops.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
5454
ggml_backend_tensor_set(tensor, data.data(), 0, size * sizeof(float));
5555
} else if (ggml_is_quantized(tensor->type) || tensor->type == GGML_TYPE_F16) {
5656
GGML_ASSERT(size % ggml_blck_size(tensor->type) == 0);
57-
std::vector<uint8_t> dataq(ggml_type_size(tensor->type)*size/ggml_blck_size(tensor->type));
57+
std::vector<uint8_t> dataq(ggml_row_size(tensor->type, size));
5858
int64_t hist[16];
5959
ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size, hist);
6060
ggml_backend_tensor_set(tensor, dataq.data(), 0, dataq.size());
@@ -72,6 +72,8 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
7272

7373
ggml_type_traits_t tt = ggml_internal_get_type_traits(t->type);
7474
size_t bs = ggml_blck_size(t->type);
75+
std::vector<float> vq(ggml_blck_size(t->type));
76+
bool quantized = ggml_is_quantized(t->type);
7577

7678
// access elements by index to avoid gaps in views
7779
for (int64_t i3 = 0; i3 < t->ne[3]; i3++) {
@@ -85,9 +87,8 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
8587
tv.push_back(*(float *) &buf[i]);
8688
} else if (t->type == GGML_TYPE_I32) {
8789
tv.push_back((float)*(int32_t *) &buf[i]);
88-
} else if (ggml_is_quantized(t->type)) {
89-
std::vector<float> vq(ggml_blck_size(t->type));
90-
tt.to_float(&buf[i], vq.data(), ggml_blck_size(t->type));
90+
} else if (quantized) {
91+
tt.to_float(&buf[i], vq.data(), bs);
9192
tv.insert(tv.end(), vq.begin(), vq.end());
9293
} else {
9394
GGML_ASSERT(false);

tests/test-quantize-perf.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ int main(int argc, char * argv[]) {
286286
qfns.from_float_reference(test_data1, test_q1, size);
287287
return test_q1[0];
288288
};
289-
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type);
289+
size_t quantized_size = ggml_row_size(type, size);
290290
benchmark_function(size, quantized_size, iterations, quantize_fn);
291291
}
292292
printf("\n");
@@ -300,7 +300,7 @@ int main(int argc, char * argv[]) {
300300
qfns.from_float(test_data1, test_q1, size);
301301
return test_q1[0];
302302
};
303-
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type);
303+
size_t quantized_size = ggml_row_size(type, size);
304304
benchmark_function(size, quantized_size, iterations, quantize_fn);
305305
}
306306
printf("\n");
@@ -315,7 +315,7 @@ int main(int argc, char * argv[]) {
315315
qfns.to_float(test_q1, test_out, size);
316316
return test_out[0];
317317
};
318-
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type);
318+
size_t quantized_size = ggml_row_size(type, size);
319319
benchmark_function(size, quantized_size, iterations, quantize_fn);
320320
}
321321
printf("\n");
@@ -330,7 +330,7 @@ int main(int argc, char * argv[]) {
330330
vdot.from_float(test_data1, test_q1, size);
331331
return test_q1[0];
332332
};
333-
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type);
333+
size_t quantized_size = ggml_row_size(type, size);
334334
benchmark_function(size, quantized_size, iterations, quantize_fn);
335335
}
336336
printf("\n");
@@ -347,7 +347,7 @@ int main(int argc, char * argv[]) {
347347
qfns.vec_dot(size, &result, test_q1, test_q2);
348348
return result;
349349
};
350-
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type);
350+
size_t quantized_size = ggml_row_size(type, size);
351351
benchmark_function(size, quantized_size, iterations, quantize_fn);
352352
}
353353
printf("\n");

0 commit comments

Comments
 (0)