File tree Expand file tree Collapse file tree 1 file changed +3
-3
lines changed Expand file tree Collapse file tree 1 file changed +3
-3
lines changed Original file line number Diff line number Diff line change @@ -1002,7 +1002,7 @@ static void llama_model_load_internal(
1002
1002
}
1003
1003
1004
1004
#ifdef GGML_USE_CUBLAS
1005
- #define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_CUDA
1005
+ #define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_GPU
1006
1006
#else
1007
1007
#define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_CPU
1008
1008
#endif
@@ -1054,7 +1054,7 @@ static void llama_model_load_internal(
1054
1054
layer.w2 = ml->get_tensor (layers_i + " .feed_forward.w2.weight" , { n_ff, n_embd}, backend);
1055
1055
layer.w3 = ml->get_tensor (layers_i + " .feed_forward.w3.weight" , {n_embd, n_ff}, backend);
1056
1056
1057
- if (backend == GGML_BACKEND_CUDA ) {
1057
+ if (backend == GGML_BACKEND_GPU ) {
1058
1058
vram_total +=
1059
1059
ggml_nbytes (layer.attention_norm ) + ggml_nbytes (layer.wq ) + ggml_nbytes (layer.wk ) +
1060
1060
ggml_nbytes (layer.wv ) + ggml_nbytes (layer.wo ) + ggml_nbytes (layer.attention_norm ) +
@@ -1115,7 +1115,7 @@ static void llama_model_load_internal(
1115
1115
}
1116
1116
}
1117
1117
for (llama_load_tensor & lt : ml->tensors_map .tensors ) {
1118
- if (lt.ggml_tensor ->backend != GGML_BACKEND_CUDA ) {
1118
+ if (lt.ggml_tensor ->backend != GGML_BACKEND_GPU ) {
1119
1119
continue ;
1120
1120
}
1121
1121
if (progress_callback) {
You can’t perform that action at this time.
0 commit comments