Skip to content

Commit df0c0c7

Browse files
authored
cuda : prevent using split buffers with 3d/4d matrices (#13919)
1 parent b49a8ff commit df0c0c7

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2994,9 +2994,12 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
29942994
{
29952995
struct ggml_tensor * a = op->src[0];
29962996
struct ggml_tensor * b = op->src[1];
2997-
// for small weight matrices the active device can end up without any rows, don't use row split in those cases
2998-
// this avoids some edge cases (and the performance would not be good anyways)
29992997
if (a->buffer && ggml_backend_buft_is_cuda_split(a->buffer->buft)) {
2998+
if (a->ne[2] > 1 || a->ne[3] > 1) {
2999+
return false;
3000+
}
3001+
// for small weight matrices the active device can end up without any rows, don't use row split in those cases
3002+
// this avoids some edge cases (and the performance would not be good anyways)
30003003
ggml_backend_cuda_split_buffer_type_context * buft_ctx = (ggml_backend_cuda_split_buffer_type_context *) a->buffer->buft->context;
30013004
int64_t row_low;
30023005
int64_t row_high;

0 commit comments

Comments
 (0)