Skip to content

Commit 5b01aa6

Browse files
committed
add ascend kernel compile struct
1 parent 652b0f7 commit 5b01aa6

11 files changed

+384
-44
lines changed

CMakeLists.txt

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -822,20 +822,37 @@ if (LLAMA_CANN)
822822
endif()
823823
endif()
824824
825-
# Set headers
826-
set(CANN_INCLUDE_DIRS "${CANN_INSTALL_DIR}/include" "${CANN_INSTALL_DIR}/include/aclnn")
825+
# Set header and libs
826+
if(LLAMA_CANN)
827+
set(CANN_INCLUDE_DIRS
828+
${CANN_INSTALL_DIR}/include
829+
${CANN_INSTALL_DIR}/include/aclnn
830+
${CANN_INSTALL_DIR}/acllib/include
831+
)
832+
833+
# TODO: find libs
834+
link_directories(
835+
${CANN_INSTALL_DIR}/lib64
836+
${CANN_INSTALL_DIR}/aarch64-linux/simulator/Ascend910B1/lib
837+
${CANN_INSTALL_DIR}/../8.0.RC1.alpha003/tools/tikicpulib/lib/Ascend910B1)
827838
828-
# Set libs
829-
if (LLAMA_CANN)
830-
# Build Ascendc kernels.
831839
add_subdirectory(ggml-cann/kernels)
832-
list(APPEND CANN_LIBRARIES ascendcl nnopbase opapi acl_op_compiler ascendc_kernels)
833-
LINK_DIRECTORIES(${LINK_DIRECTORIES} ${CANN_INSTALL_DIR}/lib64)
840+
list(APPEND CANN_LIBRARIES
841+
ascendcl
842+
nnopbase
843+
opapi
844+
acl_op_compiler
845+
cann_kernels
846+
)
834847
835848
set(GGML_HEADERS_CANN ggml-cann.h)
836849
file(GLOB GGML_SOURCES_CUDA "ggml-cann/*.cpp")
837850
list(APPEND GGML_SOURCES_CANN "ggml-cann.cpp")
838-
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${CANN_LIBRARIES})
851+
852+
message(STATUS "CANN: CANN_INCLUDE_DIRS = ${CANN_INCLUDE_DIRS}")
853+
message(STATUS "CANN: CANN_LIBRARIES = ${CANN_LIBRARIES}")
854+
855+
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${CANN_LIBRARIES} )
839856
set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${CANN_INCLUDE_DIRS})
840857
add_compile_definitions(GGML_USE_CANN)
841858
endif()
@@ -1218,7 +1235,6 @@ add_library(ggml OBJECT
12181235
ggml-backend.h
12191236
ggml-quants.c
12201237
ggml-quants.h
1221-
<<<<<<< HEAD
12221238
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
12231239
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
12241240
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
@@ -1229,17 +1245,6 @@ add_library(ggml OBJECT
12291245
${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN}
12301246
${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
12311247
${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE}
1232-
=======
1233-
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
1234-
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
1235-
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
1236-
${GGML_SOURCES_MPI} ${GGML_HEADERS_MPI}
1237-
${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
1238-
${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
1239-
${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
1240-
${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN}
1241-
${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
1242-
>>>>>>> a21434a136a034b6c64bd50b17442e36f3d7e3c8
12431248
${GGML_SOURCES_CANN} ${GGML_HEADERS_CANN}
12441249
)
12451250

ggml-cann.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,8 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
431431
ggml_cann_clamp(ctx, dst);
432432
break;
433433
case GGML_OP_CPY:
434-
return false;
434+
ggml_cann_cpy(ctx, dst);
435+
break;
435436
case GGML_OP_CONT:
436437
ggml_cann_dup(ctx, dst);
437438
break;
@@ -664,8 +665,8 @@ GGML_CALL static bool ggml_backend_cann_supports_op(ggml_backend_t backend,
664665
case GGML_OP_MUL_MAT_ID:
665666
// embedding
666667
case GGML_OP_GET_ROWS:
667-
case GGML_OP_CPY:
668668
return false;
669+
case GGML_OP_CPY:
669670
case GGML_OP_DUP:
670671
case GGML_OP_REPEAT:
671672
case GGML_OP_CONCAT:
@@ -846,7 +847,7 @@ extern "C" GGML_CALL int ggml_backend_cann_reg_devices();
846847

847848
GGML_CALL int ggml_backend_cann_reg_devices() {
848849
ACL_CHECK(aclInit(nullptr));
849-
uint32_t device_count = ggml_backend_cann_get_device_count();
850+
uint32_t device_count = 1;//= ggml_backend_cann_get_device_count();
850851
// initialization
851852
for (uint32_t i = 0; i < device_count; i++) {
852853
char name[128];

ggml-cann/aclnn_ops.cpp

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -766,12 +766,7 @@ void ggml_cann_max_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
766766
ACL_CHECK(aclDestroyIntArray(dilations));
767767
}
768768

769-
void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
770-
ggml_tensor* src = dst->src[0];
771-
772-
aclTensor* acl_src = create_acl_tensor(src);
773-
aclTensor* acl_dst = create_acl_tensor(dst);
774-
769+
void cann_copy(ggml_backend_cann_context& ctx, ggml_tensor* dst, aclTensor* acl_src, aclTensor* acl_dst) {
775770
uint64_t workspaceSize = 0;
776771
aclOpExecutor* executor;
777772
void* workspaceAddr = nullptr;
@@ -786,6 +781,16 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
786781
aclrtStream stream = ctx.stream();
787782
ACL_CHECK(aclnnInplaceCopy(workspaceAddr, workspaceSize, executor, stream));
788783

784+
}
785+
786+
void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
787+
ggml_tensor* src = dst->src[0];
788+
789+
aclTensor* acl_src = create_acl_tensor(src);
790+
aclTensor* acl_dst = create_acl_tensor(dst);
791+
792+
cann_copy(ctx, dst, acl_src, acl_dst);
793+
789794
ACL_CHECK(aclDestroyTensor(acl_src));
790795
ACL_CHECK(aclDestroyTensor(acl_dst));
791796
}
@@ -1580,5 +1585,23 @@ void ggml_cann_alibi(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
15801585
ACL_CHECK(aclDestroyTensor(tmp_mk_tensor));
15811586
ACL_CHECK(aclDestroyTensor(tmp_arange3_tensor));
15821587
ACL_CHECK(aclDestroyTensor(tmp_output_tensor));
1583-
>>>>>>> a21434a136a034b6c64bd50b17442e36f3d7e3c8
1588+
}
1589+
1590+
void ggml_cann_cpy(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
1591+
ggml_tensor* src = dst->src[0];
1592+
1593+
aclTensor* acl_src = create_acl_tensor(src);
1594+
aclTensor* acl_dst = create_acl_tensor(dst);
1595+
1596+
if(!ggml_is_quantized(dst->type)) {
1597+
cann_copy(ctx, dst, acl_src, acl_dst);
1598+
} else {
1599+
uint8_t* size = (uint8_t*)ctx.alloc_buffer(dst, sizeof(size_t));
1600+
size_t ne = ggml_nelements(src);
1601+
aclrtMemcpy(size, sizeof(size_t), &ne, sizeof(size_t), ACL_MEMCPY_HOST_TO_DEVICE);
1602+
size_t ne1;
1603+
aclrtMemcpy(&ne1, sizeof(size_t), size, sizeof(size_t), ACL_MEMCPY_DEVICE_TO_HOST);
1604+
1605+
cann_quantize_q4_0(1, nullptr, (uint8_t*)src->data, (uint8_t*)dst->data, size);
1606+
}
15841607
}

ggml-cann/aclnn_ops.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx, ggml_tensor* d
6666

6767
void ggml_cann_alibi(ggml_backend_cann_context& ctx, ggml_tensor* dst);
6868

69+
void ggml_cann_cpy(ggml_backend_cann_context& ctx, ggml_tensor* dst);
70+
6971
template <aclnnStatus getWorkspaceSize(const aclTensor*, const aclTensor*,
7072
aclTensor*, uint64_t*, aclOpExecutor**),
7173
aclnnStatus execute(void*, uint64_t, aclOpExecutor*, aclrtStream)>

ggml-cann/kernels/CMakeLists.txt

Lines changed: 41 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,46 @@
1-
if(NOT SOC_VERSION)
2-
set(SOC_VERSION "ascend910b3")
1+
if (NOT SOC_TYPE)
2+
set (SOC_TYPE "Ascend910B3")
33
endif()
4-
set(ASCEND_CANN_PACKAGE_PATH ${CANN_INSTALL_DIR})
5-
set(RUN_MODE "npu" CACHE STRING "run mode: npu/sim/cpu")
64

7-
if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
8-
set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
9-
elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake)
10-
set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake)
5+
file(GLOB SRC_FILES
6+
dequantize_q4_0.cpp
7+
quantize_q4_0.cpp
8+
)
9+
10+
string(TOLOWER "${CMAKE_BUILD_TYPE}" lowercase_CMAKE_BUILD_TYPE)
11+
if(${lowercase_CMAKE_BUILD_TYPE} STREQUAL "debug")
12+
if (NOT DEFINED ENV{CMAKE_PREFIX_PATH})
13+
set(CMAKE_PREFIX_PATH ${CANN_INSTALL_DIR}/tools/tikicpulib/lib/cmake)
14+
endif()
15+
16+
find_package(tikicpulib REQUIRED)
17+
add_library(cann_kernels ${SRC_FILES} ascendc_kernels.cpp)
18+
target_link_libraries(cann_kernels PRIVATE
19+
ascendcl
20+
tikicpulib::ascend910B1
21+
)
22+
23+
target_compile_features(cann_kernels PRIVATE cxx_std_17)
24+
1125
else()
12-
message(FATAL_ERROR "ascendc_kernel_cmake does not exist, please check whether the compiler package is installed.")
13-
endif()
26+
string(TOLOWER ${SOC_TYPE} SOC_VERSION)
27+
set(ASCEND_CANN_PACKAGE_PATH ${CANN_INSTALL_DIR})
28+
set(RUN_MODE "npu" CACHE STRING "run mode: npu/sim")
1429

15-
include(${ASCENDC_CMAKE_DIR}/ascendc.cmake)
30+
if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
31+
set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
32+
elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake)
33+
set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake)
34+
else()
35+
message(FATAL_ERROR "ascendc_kernel_cmake does not exist, please check whether the compiler package is installed.")
36+
endif()
37+
include(${ASCENDC_CMAKE_DIR}/ascendc.cmake)
1638

17-
ascendc_library(ascendc_kernels STATIC
18-
threshold_opencv_kernel.cpp
19-
)
39+
ascendc_library(ascendc_kernels STATIC
40+
${SRC_FILES}
41+
)
42+
#ascendc_compile_definitions(ascendc_kernels PRIVATE -DASCENDC_DUMP)
43+
44+
add_library(cann_kernels STATIC ascendc_kernels.cpp)
45+
target_link_libraries(cann_kernels PUBLIC ascendc_kernels)
46+
endif()

ggml-cann/kernels/ascendc_kernels.cpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#include "ascendc_kernels.h"
2+
3+
#ifdef __CCE_KT_TEST__
4+
#include "tikicpulib.h"
5+
#else
6+
#include "aclrtlaunch_ascendc_dequantize_q4_0.h"
7+
#include "aclrtlaunch_ascendc_quantize_q4_0.h"
8+
#endif
9+
10+
11+
#ifdef __CCE_KT_TEST__
12+
#include <acl/acl.h>
13+
14+
uint8_t* to_gm(uint8_t* ptr, size_t size) {
15+
uint8_t* gm = (uint8_t*)AscendC::GmAlloc(size);
16+
aclrtMemcpy(gm, size, ptr, size, ACL_MEMCPY_DEVICE_TO_HOST);
17+
return gm;
18+
}
19+
20+
void free_gm(uint8_t* ptr) {
21+
aclrtFree(ptr);
22+
}
23+
24+
extern "C" __global__ __aicore__ void ascendc_dequantize_q4_0(GM_ADDR x, GM_ADDR y, GM_ADDR size);
25+
extern "C" __global__ __aicore__ void ascendc_quantize_q4_0(GM_ADDR x, GM_ADDR y, GM_ADDR size);
26+
#endif
27+
28+
void cann_dequantize_q4_0(uint32_t block_dim, void* stream, uint8_t* x, uint8_t* y, uint8_t* size) {
29+
#ifdef __CCE_KT_TEST__
30+
uint8_t* size_host = to_gm(size, sizeof(size_t));
31+
uint8_t* x_host = to_gm(x, *((size_t*)size_host));
32+
uint8_t* y_host = to_gm(y, *((size_t*)size_host));
33+
AscendC::SetKernelMode(KernelMode::AIV_MODE);
34+
ICPU_RUN_KF(ascendc_dequantize_q4_0, 1, x_host, y_host, size_host);
35+
free_gm(size_host);
36+
free_gm(x_host);
37+
free_gm(y_host);
38+
#else
39+
aclrtlaunch_ascendc_dequantize_q4_0(block_dim, stream, x, y, size);
40+
#endif
41+
}
42+
43+
void cann_quantize_q4_0(uint32_t block_dim, void* stream, uint8_t* x, uint8_t* y, uint8_t* size) {
44+
#ifdef __CCE_KT_TEST__
45+
uint8_t* size_host = to_gm(size, sizeof(size_t));
46+
uint8_t* x_host = to_gm(x, *((size_t*)size_host));
47+
uint8_t* y_host = to_gm(y, *((size_t*)size_host));
48+
AscendC::SetKernelMode(KernelMode::AIV_MODE);
49+
ICPU_RUN_KF(ascendc_quantize_q4_0, 1, x_host, y_host, size_host);
50+
free_gm(size_host);
51+
free_gm(x_host);
52+
free_gm(y_host);
53+
#else
54+
aclrtlaunch_ascendc_quantize_q4_0(block_dim, stream, x, y, size);
55+
#endif
56+
}

ggml-cann/kernels/ascendc_kernels.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,10 @@
11
#ifndef ASCENDC_KERNELS_H
22
#define ASCENDC_KERNELS_H
33

4+
5+
#include <stdint.h>
6+
7+
void cann_dequantize_q4_0(uint32_t block_dim, void* stream, uint8_t* x, uint8_t* y, uint8_t* size);
8+
void cann_quantize_q4_0(uint32_t block_dim, void* stream, uint8_t* x, uint8_t* y, uint8_t* size);
9+
410
#endif //ASCENDC_KERNELS_H

0 commit comments

Comments
 (0)