Skip to content

Commit 1cf2850

Browse files
authored
ggml-cuda : increase max graph size (ggml-org#4084)
1 parent 6bb4908 commit 1cf2850

File tree

1 file changed

+7
-5
lines changed

1 file changed

+7
-5
lines changed

ggml-cuda.cu

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@
8888
#define CC_OFFSET_AMD 1000000
8989
#define CC_RDNA2 (CC_OFFSET_AMD + 1030)
9090

91+
#define GGML_CUDA_MAX_NODES 8192
92+
9193
// define this if you want to always fallback to MMQ kernels and not use cuBLAS for matrix multiplication
9294
// on modern hardware, using cuBLAS is recommended as it utilizes F16 tensor cores which are very performant
9395
// for large computational tasks. the drawback is that this requires some extra amount of VRAM:
@@ -7727,7 +7729,7 @@ static void ggml_cuda_alibi(const ggml_tensor * src0, const ggml_tensor * src1,
77277729
ggml_cuda_op_flatten(src0, src1, dst, ggml_cuda_op_alibi);
77287730
}
77297731

7730-
void ggml_cuda_im2col(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
7732+
static void ggml_cuda_im2col(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
77317733
ggml_cuda_op_flatten(src0, src1, dst, ggml_cuda_op_im2col);
77327734
}
77337735

@@ -7842,11 +7844,11 @@ static size_t g_temp_tensor_extra_index = 0;
78427844

78437845
static ggml_tensor_extra_gpu * ggml_cuda_alloc_temp_tensor_extra() {
78447846
if (g_temp_tensor_extras == nullptr) {
7845-
g_temp_tensor_extras = new ggml_tensor_extra_gpu[GGML_DEFAULT_GRAPH_SIZE];
7847+
g_temp_tensor_extras = new ggml_tensor_extra_gpu[GGML_CUDA_MAX_NODES];
78467848
}
78477849

78487850
size_t alloc_index = g_temp_tensor_extra_index;
7849-
g_temp_tensor_extra_index = (g_temp_tensor_extra_index + 1) % GGML_DEFAULT_GRAPH_SIZE;
7851+
g_temp_tensor_extra_index = (g_temp_tensor_extra_index + 1) % GGML_CUDA_MAX_NODES;
78507852
ggml_tensor_extra_gpu * extra = &g_temp_tensor_extras[alloc_index];
78517853
memset(extra, 0, sizeof(*extra));
78527854

@@ -8173,11 +8175,11 @@ struct ggml_backend_buffer_context_cuda {
81738175

81748176
ggml_tensor_extra_gpu * ggml_cuda_alloc_temp_tensor_extra() {
81758177
if (temp_tensor_extras == nullptr) {
8176-
temp_tensor_extras = new ggml_tensor_extra_gpu[GGML_DEFAULT_GRAPH_SIZE];
8178+
temp_tensor_extras = new ggml_tensor_extra_gpu[GGML_CUDA_MAX_NODES];
81778179
}
81788180

81798181
size_t alloc_index = temp_tensor_extra_index;
8180-
temp_tensor_extra_index = (temp_tensor_extra_index + 1) % GGML_DEFAULT_GRAPH_SIZE;
8182+
temp_tensor_extra_index = (temp_tensor_extra_index + 1) % GGML_CUDA_MAX_NODES;
81818183
ggml_tensor_extra_gpu * extra = &temp_tensor_extras[alloc_index];
81828184
memset(extra, 0, sizeof(*extra));
81838185

0 commit comments

Comments
 (0)