Skip to content

Commit 1bdd8ae

Browse files
[CANN] Add Ascend NPU backend (#6035)
* [CANN] Add Ascend NPU backend Ascend is a full-stack AI computing infrastructure for industry applications and services based on Huawei Ascend processors and software. CANN (Compute Architecture of Neural Networks), developped by Huawei, is a heterogeneous computing architecture for AI. Co-authored-by: wangshuai09 <391746016@qq.com> * delete trailing whitespaces * Modify the code based on review comment * Rename LLAMA_CANN to GGML_CANN * Make ggml-common.h private * add ggml_cann prefix for acl funcs * Add logging for CANN backend * Delete Trailing whitespace --------- Co-authored-by: wangshuai09 <391746016@qq.com>
1 parent da3913d commit 1bdd8ae

27 files changed

+10756
-8
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ llama_option_depr(WARNING LLAMA_NATIVE GGML_NATIVE)
106106
llama_option_depr(WARNING LLAMA_RPC GGML_RPC)
107107
llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL)
108108
llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16)
109+
llama_option_depr(WARNING LLAMA_CANN GGML_CANN)
109110

110111
#
111112
# build the library

examples/llama-bench/llama-bench.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@
2323
#include "ggml-cuda.h"
2424
#include "ggml-sycl.h"
2525

26+
#ifdef GGML_USE_CANN
27+
#include "ggml-cann.h"
28+
#endif
29+
2630
// utils
2731
static uint64_t get_time_ns() {
2832
using clock = std::chrono::high_resolution_clock;
@@ -120,6 +124,17 @@ static std::string get_gpu_info() {
120124
id += "/";
121125
}
122126
}
127+
#endif
128+
#ifdef GGML_USE_CANN
129+
uint32_t count = ggml_backend_cann_get_device_count();
130+
for (uint32_t i = 0; i < count; i++) {
131+
char buf[128];
132+
ggml_backend_cann_get_device_description(i, buf, sizeof(buf));
133+
id += buf;
134+
if (i < count - 1) {
135+
id += "/";
136+
}
137+
}
123138
#endif
124139
// TODO: other backends
125140
return id;

examples/llava/clip.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616
#include "ggml-metal.h"
1717
#endif
1818

19+
#ifdef GGML_USE_CANN
20+
#include "ggml-cann.h"
21+
#endif
22+
1923
#define STB_IMAGE_IMPLEMENTATION
2024
#include "stb_image.h"
2125

@@ -1001,6 +1005,11 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
10011005
LOG_TEE("%s: CLIP using Metal backend\n", __func__);
10021006
#endif
10031007

1008+
#ifdef GGML_USE_CANN
1009+
new_clip->backend = ggml_backend_cann_init(0);
1010+
LOG_TEE("%s: CLIP using CANN backend\n", __func__);
1011+
#endif
1012+
10041013

10051014
if (!new_clip->backend) {
10061015
new_clip->backend = ggml_backend_cpu_init();

ggml/include/ggml-cann.h

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
/*
2+
* Copyright (c) 2023-2024 The ggml authors
3+
*
4+
* Permission is hereby granted, free of charge, to any person obtaining a copy
5+
* of this software and associated documentation files (the "Software"), to
6+
* deal in the Software without restriction, including without limitation the
7+
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8+
* sell copies of the Software, and to permit persons to whom the Software is
9+
* furnished to do so, subject to the following conditions:
10+
*
11+
* The above copyright notice and this permission notice shall be included in
12+
* all copies or substantial portions of the Software.
13+
*
14+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20+
* IN THE SOFTWARE.
21+
*/
22+
23+
#pragma once
24+
25+
#include "ggml-backend.h"
26+
#include "ggml.h"
27+
28+
#ifdef __cplusplus
29+
extern "C" {
30+
#endif
31+
32+
/**
33+
* @brief Maximum number of CANN devices supported.
34+
*/
35+
#define GGML_CANN_MAX_DEVICES 16
36+
37+
/**
38+
* @brief Initializes the CANN backend for a specified device.
39+
*
40+
* This function initializes the CANN backend for the given device.
41+
* It verifies the device index, allocates a context, and creates a backend
42+
* instance.
43+
*
44+
* @param device The index of the device to initialize.
45+
* @return A pointer to the initialized backend instance, or nullptr on failure.
46+
*/
47+
GGML_API GGML_CALL ggml_backend_t ggml_backend_cann_init(int32_t device);
48+
49+
/**
50+
* @brief Checks if a given backend is a CANN backend.
51+
*
52+
* This function verifies if the provided backend is a CANN backend by comparing
53+
* its GUID with the CANN backend's GUID.
54+
*
55+
* @param backend The backend instance to check.
56+
* @return True if the backend is a CANN backend, false otherwise.
57+
*/
58+
GGML_API GGML_CALL bool ggml_backend_is_cann(ggml_backend_t backend);
59+
60+
/**
61+
* @brief Retrieves the CANN buffer type for a specified device.
62+
*
63+
* This function initializes and returns the buffer type interface associated
64+
* with the given device. It ensures thread-safe access using a mutex.
65+
*
66+
* @param device The device index for which to retrieve the buffer type.
67+
* @return A pointer to the buffer type interface for the specified device, or
68+
* nullptr if the device index is out of range.
69+
*/
70+
GGML_API GGML_CALL ggml_backend_buffer_type_t
71+
ggml_backend_cann_buffer_type(int32_t device);
72+
73+
/**
74+
* @brief Retrieves the number of CANN devices available.
75+
*
76+
* This function returns the number of CANN devices available based on
77+
* information obtained from `ggml_cann_info()`.
78+
*
79+
* @return The number of CANN devices available.
80+
*/
81+
GGML_API GGML_CALL int32_t ggml_backend_cann_get_device_count(void);
82+
83+
/**
84+
* @brief Retrieves the description of a specific CANN device.
85+
*
86+
* This function sets the specified device, retrieves the SoC name,
87+
* and writes it into the provided description buffer.
88+
*
89+
* @param device The device index to retrieve the description for.
90+
* @param description Pointer to a buffer where the description will be written.
91+
* @param description_size Size of the description buffer.
92+
*/
93+
GGML_API GGML_CALL void ggml_backend_cann_get_device_description(
94+
int32_t device, char* description, size_t description_size);
95+
96+
/**
97+
* @brief Retrieves the memory information of a specific CANN device.
98+
*
99+
* This function sets the specified device, retrieves the free and total
100+
* memory information of the specified type (ACL_HBM_MEM), and stores them
101+
* in the provided pointers.
102+
*
103+
* @param device The device index to retrieve memory information for.
104+
* @param free Pointer to a variable where the free memory size will be stored.
105+
* @param total Pointer to a variable where the total memory size will be
106+
* stored.
107+
*/
108+
GGML_API GGML_CALL void ggml_backend_cann_get_device_memory(int32_t device,
109+
size_t* free,
110+
size_t* total);
111+
112+
/**
113+
* @brief Set the logging callback for GGML.
114+
*
115+
* This function sets the logging callback and user data for logging.
116+
*
117+
* @param log_callback The logging callback to set.
118+
* @param user_data User data to pass to the logging callback.
119+
*/
120+
GGML_API void ggml_backend_cann_log_set_callback(ggml_log_callback log_callback,
121+
void* user_data);
122+
123+
#ifdef __cplusplus
124+
}
125+
#endif

ggml/include/ggml.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -753,6 +753,8 @@ extern "C" {
753753
GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
754754
GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
755755

756+
GGML_API bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
757+
756758
// use this to compute the memory overhead of a tensor
757759
GGML_API size_t ggml_tensor_overhead(void);
758760

@@ -2397,6 +2399,7 @@ extern "C" {
23972399
GGML_API int ggml_cpu_has_rpc (void);
23982400
GGML_API int ggml_cpu_has_vsx (void);
23992401
GGML_API int ggml_cpu_has_matmul_int8(void);
2402+
GGML_API int ggml_cpu_has_cann (void);
24002403

24012404
//
24022405
// Internal types and functions exposed for tests and benchmarks

ggml/src/CMakeLists.txt

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -770,6 +770,74 @@ if (GGML_CPU_HBM)
770770
target_link_libraries(ggml PUBLIC memkind)
771771
endif()
772772
773+
if (GGML_CANN)
774+
if ("cann${CANN_INSTALL_DIR}" STREQUAL "cann" AND DEFINED ENV{ASCEND_TOOLKIT_HOME})
775+
set(CANN_INSTALL_DIR $ENV{ASCEND_TOOLKIT_HOME})
776+
message(STATUS "CANN: updated CANN_INSTALL_DIR from ASCEND_TOOLKIT_HOME=$ENV{ASCEND_TOOLKIT_HOME}")
777+
endif()
778+
779+
if (CANN_INSTALL_DIR)
780+
# Only Support Linux.
781+
if (GGML_CANN)
782+
if (NOT UNIX)
783+
set(GGML_CANN OFF)
784+
message(WARNING "CANN: CANN toolkit supports unix but not ${CMAKE_SYSTEM_NAME}. Turning off GGML_CANN")
785+
endif()
786+
endif()
787+
788+
# Supported platforms: x86-64, arm64
789+
if (GGML_CANN)
790+
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
791+
elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64")
792+
else()
793+
set(GGML_CANN OFF)
794+
message(WARNING "CANN: CANN toolkit supports x86-64 and arm64 but not ${CMAKE_SYSTEM_PROCESSOR}. Turning off GGML_CANN")
795+
endif()
796+
endif()
797+
798+
# Set header and libs
799+
if(GGML_CANN)
800+
set(CANN_INCLUDE_DIRS
801+
${CANN_INSTALL_DIR}/include
802+
${CANN_INSTALL_DIR}/include/aclnn
803+
${CANN_INSTALL_DIR}/acllib/include
804+
)
805+
806+
# TODO: find libs
807+
link_directories(
808+
${CANN_INSTALL_DIR}/lib64
809+
)
810+
811+
add_subdirectory(ggml-cann/kernels)
812+
list(APPEND CANN_LIBRARIES
813+
ascendcl
814+
nnopbase
815+
opapi
816+
acl_op_compiler
817+
ascendc_kernels
818+
)
819+
820+
set(GGML_HEADERS_CANN "../include/ggml-cann.h")
821+
file(GLOB GGML_SOURCES_CANN "ggml-cann/*.cpp")
822+
list(APPEND GGML_SOURCES_CANN "ggml-cann.cpp")
823+
824+
message(STATUS "CANN: CANN_INCLUDE_DIRS = ${CANN_INCLUDE_DIRS}")
825+
message(STATUS "CANN: CANN_LIBRARIES = ${CANN_LIBRARIES}")
826+
827+
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${CANN_LIBRARIES} )
828+
set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${CANN_INCLUDE_DIRS})
829+
list(APPEND GGML_CDEF_PUBLIC GGML_USE_CANN)
830+
endif()
831+
else()
832+
set(GGML_CANN OFF)
833+
message(WARNING "CANN: Can't find CANN_INSTALL_DIR, do you forget to source set_var.sh. Turning off GGML_CANN")
834+
endif()
835+
836+
if(NOT GGML_CANN)
837+
message(WARNING "CANN: GGML_CANN is turned OFF, see above for details.")
838+
endif()
839+
endif()
840+
773841
function(get_flags CCID CCVER)
774842
set(C_FLAGS "")
775843
set(CXX_FLAGS "")
@@ -1184,6 +1252,7 @@ add_library(ggml
11841252
${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
11851253
${GGML_SOURCES_BLAS} ${GGML_HEADERS_BLAS}
11861254
${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE}
1255+
${GGML_SOURCES_CANN} ${GGML_HEADERS_CANN}
11871256
ggml-aarch64.c ggml-aarch64.h
11881257
)
11891258

ggml/src/ggml-backend.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,11 @@ GGML_CALL static void ggml_backend_registry_init(void) {
445445
extern GGML_CALL void ggml_backend_kompute_reg_devices(void);
446446
ggml_backend_kompute_reg_devices();
447447
#endif
448+
449+
#ifdef GGML_USE_CANN
450+
extern GGML_CALL int ggml_backend_cann_reg_devices(void);
451+
ggml_backend_cann_reg_devices();
452+
#endif
448453
}
449454

450455
GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) {

0 commit comments

Comments
 (0)