Skip to content

ggml : use ggml_row_size where possible #4472

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -8898,6 +8898,12 @@ static void ggml_cuda_nop(const ggml_tensor * src0, const ggml_tensor * src1, gg
(void) dst;
}

static size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split) {
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");

return nrows_split*ggml_row_size(tensor->type, tensor->ne[0]);
}

void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor) {
const int64_t nrows = ggml_nrows(tensor);

Expand Down Expand Up @@ -8947,8 +8953,7 @@ void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor) {

// pad last row to a multiple of 512 elements to avoid out-of-bounds memory accesses
if (ne0 % MATRIX_ROW_PADDING != 0) {
size += (MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING)
* ggml_type_size(tensor->type)/ggml_blck_size(tensor->type);
size += ggml_row_size(tensor->type, MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING);
}

char * buf;
Expand Down Expand Up @@ -9485,8 +9490,7 @@ static size_t ggml_backend_cuda_buffer_type_get_alloc_size(ggml_backend_buffer_t

if (ggml_is_quantized(tensor->type)) {
if (ne0 % MATRIX_ROW_PADDING != 0) {
size += (MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING)
* ggml_type_size(tensor->type)/ggml_blck_size(tensor->type);
size += ggml_row_size(tensor->type, MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING);
}
}

Expand Down
18 changes: 6 additions & 12 deletions ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -1997,12 +1997,6 @@ size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN);
}

size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split) {
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");

return (nrows_split*tensor->ne[0]*ggml_type_size(tensor->type))/ggml_blck_size(tensor->type);
}

int ggml_blck_size(enum ggml_type type) {
return type_traits[type].blck_size;
}
Expand Down Expand Up @@ -2491,7 +2485,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
view_src = view_src->view_src;
}

size_t data_size = ggml_type_size(type)*(ne[0]/ggml_blck_size(type));
size_t data_size = ggml_row_size(type, ne[0]);
for (int i = 1; i < n_dims; i++) {
data_size *= ne[i];
}
Expand Down Expand Up @@ -9698,7 +9692,7 @@ static void ggml_compute_forward_mul_mat(
if (params->type == GGML_TASK_INIT) {
if (src1->type != vec_dot_type) {
char * wdata = params->wdata;
const size_t row_size = ne10*ggml_type_size(vec_dot_type)/ggml_blck_size(vec_dot_type);
const size_t row_size = ggml_row_size(vec_dot_type, ne10);

assert(params->wsize >= ne11*ne12*ne13*row_size);
assert(src1->type == GGML_TYPE_F32);
Expand All @@ -9721,7 +9715,7 @@ static void ggml_compute_forward_mul_mat(
}

const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata;
const size_t row_size = ne10*ggml_type_size(vec_dot_type)/ggml_blck_size(vec_dot_type);
const size_t row_size = ggml_row_size(vec_dot_type, ne10);

const int64_t nr0 = ne01; // src0 rows
const int64_t nr1 = cne1*ne12*ne13; // src1 rows
Expand Down Expand Up @@ -16326,7 +16320,7 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
} else
#endif
if (node->src[1]->type != vec_dot_type) {
cur = ggml_type_size(vec_dot_type)*ggml_nelements(node->src[1])/ggml_blck_size(vec_dot_type);
cur = ggml_row_size(vec_dot_type, ggml_nelements(node->src[1]));
}
} break;
case GGML_OP_MUL_MAT_ID:
Expand All @@ -16343,7 +16337,7 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
} else
#endif
if (b->type != vec_dot_type) {
cur = ggml_type_size(vec_dot_type)*ggml_nelements(b)/ggml_blck_size(vec_dot_type);
cur = ggml_row_size(vec_dot_type, ggml_nelements(b));
}
} break;
case GGML_OP_OUT_PROD:
Expand Down Expand Up @@ -18703,7 +18697,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
return NULL;
}

const size_t size_cur = (ne*ggml_type_size(info->type))/ggml_blck_size(info->type);
const size_t size_cur = ggml_row_size(info->type, ne);

ctx->size += GGML_PAD(size_cur, ctx->alignment);
}
Expand Down
1 change: 0 additions & 1 deletion ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -638,7 +638,6 @@ extern "C" {
GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);
GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
GGML_API size_t ggml_nbytes_pad (const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
GGML_API size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split);

GGML_API int ggml_blck_size(enum ggml_type type);
GGML_API size_t ggml_type_size(enum ggml_type type); // size in bytes for all elements in a block
Expand Down
9 changes: 5 additions & 4 deletions tests/test-backend-ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
ggml_backend_tensor_set(tensor, data.data(), 0, size * sizeof(float));
} else if (ggml_is_quantized(tensor->type) || tensor->type == GGML_TYPE_F16) {
GGML_ASSERT(size % ggml_blck_size(tensor->type) == 0);
std::vector<uint8_t> dataq(ggml_type_size(tensor->type)*size/ggml_blck_size(tensor->type));
std::vector<uint8_t> dataq(ggml_row_size(tensor->type, size));
int64_t hist[16];
ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size, hist);
ggml_backend_tensor_set(tensor, dataq.data(), 0, dataq.size());
Expand All @@ -72,6 +72,8 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {

ggml_type_traits_t tt = ggml_internal_get_type_traits(t->type);
size_t bs = ggml_blck_size(t->type);
std::vector<float> vq(ggml_blck_size(t->type));
bool quantized = ggml_is_quantized(t->type);

// access elements by index to avoid gaps in views
for (int64_t i3 = 0; i3 < t->ne[3]; i3++) {
Expand All @@ -85,9 +87,8 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
tv.push_back(*(float *) &buf[i]);
} else if (t->type == GGML_TYPE_I32) {
tv.push_back((float)*(int32_t *) &buf[i]);
} else if (ggml_is_quantized(t->type)) {
std::vector<float> vq(ggml_blck_size(t->type));
tt.to_float(&buf[i], vq.data(), ggml_blck_size(t->type));
} else if (quantized) {
tt.to_float(&buf[i], vq.data(), bs);
tv.insert(tv.end(), vq.begin(), vq.end());
} else {
GGML_ASSERT(false);
Expand Down
10 changes: 5 additions & 5 deletions tests/test-quantize-perf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ int main(int argc, char * argv[]) {
qfns.from_float_reference(test_data1, test_q1, size);
return test_q1[0];
};
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type);
size_t quantized_size = ggml_row_size(type, size);
benchmark_function(size, quantized_size, iterations, quantize_fn);
}
printf("\n");
Expand All @@ -300,7 +300,7 @@ int main(int argc, char * argv[]) {
qfns.from_float(test_data1, test_q1, size);
return test_q1[0];
};
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type);
size_t quantized_size = ggml_row_size(type, size);
benchmark_function(size, quantized_size, iterations, quantize_fn);
}
printf("\n");
Expand All @@ -315,7 +315,7 @@ int main(int argc, char * argv[]) {
qfns.to_float(test_q1, test_out, size);
return test_out[0];
};
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type);
size_t quantized_size = ggml_row_size(type, size);
benchmark_function(size, quantized_size, iterations, quantize_fn);
}
printf("\n");
Expand All @@ -330,7 +330,7 @@ int main(int argc, char * argv[]) {
vdot.from_float(test_data1, test_q1, size);
return test_q1[0];
};
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type);
size_t quantized_size = ggml_row_size(type, size);
benchmark_function(size, quantized_size, iterations, quantize_fn);
}
printf("\n");
Expand All @@ -347,7 +347,7 @@ int main(int argc, char * argv[]) {
qfns.vec_dot(size, &result, test_q1, test_q2);
return result;
};
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type);
size_t quantized_size = ggml_row_size(type, size);
benchmark_function(size, quantized_size, iterations, quantize_fn);
}
printf("\n");
Expand Down