Skip to content

ggml-cpu : split arch-specific implementations #13892

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 52 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
70a5b76
move ggml-cpu-aarch64 to repack
xctan May 28, 2025
f46931f
split quantize_row_q8_0/1
xctan May 28, 2025
72c1598
split helper functions
xctan May 28, 2025
914d299
split ggml_vec_dot_q4_0_q8_0
xctan May 28, 2025
5e31cf8
split ggml_vec_dot_q4_1_q8_1
xctan May 28, 2025
b9e7665
split ggml_vec_dot_q5_0_q8_0
xctan May 28, 2025
2b612e7
split ggml_vec_dot_q5_1_q8_1
xctan May 28, 2025
63dbd33
split ggml_vec_dot_q8_0_q8_0
xctan May 28, 2025
347e94f
split ggml_vec_dot_tq1_0_q8_K
xctan May 28, 2025
a277be4
split ggml_vec_dot_tq2_0_q8_K
xctan May 28, 2025
d7d10eb
split ggml_vec_dot_q2_K_q8_K
xctan May 28, 2025
a36ba7a
split ggml_vec_dot_q3_K_q8_K
xctan May 28, 2025
946b821
split ggml_vec_dot_q4_K_q8_K
xctan May 28, 2025
6765056
split ggml_vec_dot_q5_K_q8_K
xctan May 28, 2025
177d68e
split ggml_vec_dot_q6_K_q8_K
xctan May 28, 2025
c55f39f
split ggml_vec_dot_iq2_xxs_q8_K
xctan May 28, 2025
14b7be4
split ggml_vec_dot_iq2_xs_q8_K
xctan May 28, 2025
ff418a0
split ggml_vec_dot_iq2_s_q8_K
xctan May 28, 2025
8a3d48a
split ggml_vec_dot_iq3_xxs_q8_K
xctan May 28, 2025
2394cf4
split ggml_vec_dot_iq3_s_q8_K
xctan May 28, 2025
46c941b
split ggml_vec_dot_iq1_s_q8_K
xctan May 28, 2025
814109e
split ggml_vec_dot_iq1_m_q8_K
xctan May 29, 2025
8c1080c
split ggml_vec_dot_iq4_nl_q8_0
xctan May 29, 2025
0b20c8c
split ggml_vec_dot_iq4_xs_q8_K
xctan May 29, 2025
144151c
fix typos
xctan May 29, 2025
aa81832
fix missing prototypes
xctan May 29, 2025
0b4c6b7
rename ggml-cpu-quants.c
xctan May 29, 2025
3adcf30
rename ggml-cpu-traits
xctan May 29, 2025
62ed22b
rename arm folder
xctan May 29, 2025
df53a19
move cpu-feats-x86.cpp
xctan May 29, 2025
cf21310
rename ggml-cpu-hbm
xctan May 29, 2025
ddade65
update arm detection macro in quants.c
xctan May 29, 2025
540d0dd
move iq quant tables
xctan May 29, 2025
eca1251
split ggml_quantize_mat_q8_0/K
xctan May 29, 2025
d44e6b3
split ggml_gemv_*
xctan May 29, 2025
babbbd6
split ggml_gemm_*
xctan May 29, 2025
cbf5ebf
rename namespace aarch64 to repack
xctan May 29, 2025
c126d24
use weak aliases to replace test macros
xctan May 29, 2025
b2ff353
rename GGML_CPU_AARCH64 to GGML_CPU_REPACK
xctan May 29, 2025
2599c59
rename more aarch64 to repack
xctan May 29, 2025
d83f00a
clean up rebase leftover
xctan May 29, 2025
547d240
fix compilation errors
xctan May 29, 2025
3ac6896
remove trailing spaces
xctan May 29, 2025
097f253
try to fix clang compilation errors
xctan May 29, 2025
5c978b9
try to fix clang compilation errors again
xctan May 29, 2025
cb8bca9
try to fix clang compilation errors, 3rd attempt
xctan May 29, 2025
66b35c8
try to fix clang compilation errors, 4th attempt
xctan May 29, 2025
e156648
try to fix clang compilation errors, 5th attempt
xctan May 29, 2025
a7c9a03
try to fix clang compilation errors, 6th attempt
xctan May 30, 2025
9b91918
try to fix clang compilation errors, 7th attempt
xctan May 30, 2025
2bad20d
try to fix clang compilation errors, 8th attempt
xctan May 30, 2025
87ed654
try to fix clang compilation errors, 9th attempt
xctan May 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ ifdef LLAMA_SERVER_SSL
endif

ifndef GGML_NO_CPU_AARCH64
MK_CPPFLAGS += -DGGML_USE_CPU_AARCH64
MK_CPPFLAGS += -DGGML_USE_CPU_REPACK
endif

# warnings
Expand Down Expand Up @@ -970,7 +970,7 @@ OBJ_GGML = \
$(DIR_GGML)/src/ggml-threading.o \
$(DIR_GGML)/src/ggml-cpu/ggml-cpu.o \
$(DIR_GGML)/src/ggml-cpu/ggml-cpu_cpp.o \
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-aarch64.o \
$(DIR_GGML)/src/ggml-cpu/repack.o \
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-hbm.o \
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-quants.o \
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-traits.o \
Expand Down
2 changes: 1 addition & 1 deletion ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ message(DEBUG "GGML_NATIVE_DEFAULT : ${GGML_NATIVE_DEFAULT}")
message(DEBUG "INS_ENB : ${INS_ENB}")

option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
option(GGML_CPU_REPACK "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
option(GGML_CPU_KLEIDIAI "ggml: use KleidiAI optimized kernels if applicable" OFF)
option(GGML_SSE42 "ggml: enable SSE 4.2" ${INS_ENB})
option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
Expand Down
46 changes: 34 additions & 12 deletions ggml/src/ggml-cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
list (APPEND GGML_CPU_SOURCES
ggml-cpu/ggml-cpu.c
ggml-cpu/ggml-cpu.cpp
ggml-cpu/ggml-cpu-aarch64.cpp
ggml-cpu/ggml-cpu-aarch64.h
ggml-cpu/ggml-cpu-hbm.cpp
ggml-cpu/ggml-cpu-hbm.h
ggml-cpu/ggml-cpu-quants.c
ggml-cpu/ggml-cpu-quants.h
ggml-cpu/ggml-cpu-traits.cpp
ggml-cpu/ggml-cpu-traits.h
ggml-cpu/repack.cpp
ggml-cpu/repack.h
ggml-cpu/hbm.cpp
ggml-cpu/hbm.h
ggml-cpu/quants.c
ggml-cpu/quants.h
ggml-cpu/traits.cpp
ggml-cpu/traits.h
ggml-cpu/amx/amx.cpp
ggml-cpu/amx/amx.h
ggml-cpu/amx/mmq.cpp
Expand Down Expand Up @@ -84,6 +84,11 @@ function(ggml_add_cpu_backend_variant_impl tag_name)

if (GGML_SYSTEM_ARCH STREQUAL "ARM")
message(STATUS "ARM detected")
list(APPEND GGML_CPU_SOURCES
ggml-cpu/arch/arm/quants.c
ggml-cpu/arch/arm/repack.cpp
)

if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang")
message(FATAL_ERROR "MSVC is not supported for ARM, use clang")
else()
Expand Down Expand Up @@ -167,6 +172,11 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
endif()
elseif (GGML_SYSTEM_ARCH STREQUAL "x86")
message(STATUS "x86 detected")
list(APPEND GGML_CPU_SOURCES
ggml-cpu/arch/x86/quants.c
ggml-cpu/arch/x86/repack.cpp
)

if (MSVC)
# instruction set detection for MSVC only
if (GGML_NATIVE)
Expand Down Expand Up @@ -302,7 +312,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
# Since multiple variants of the CPU backend may be included in the same
# build, using set_source_files_properties() to set the arch flags is not possible
set(GGML_CPU_FEATS_NAME ${GGML_CPU_NAME}-feats)
add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/cpu-feats-x86.cpp)
add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/arch/x86/cpu-feats.cpp)
target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARCH_DEFINITIONS})
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
Expand All @@ -311,6 +321,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
endif()
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
message(STATUS "PowerPC detected")
list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/powerpc/quants.c)
if (GGML_NATIVE)
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
file(READ "/proc/cpuinfo" POWER10_M)
Expand All @@ -337,6 +348,8 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
endif()
elseif (GGML_SYSTEM_ARCH STREQUAL "loongarch64")
message(STATUS "loongarch64 detected")
list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/loongarch/quants.c)

list(APPEND ARCH_FLAGS -march=loongarch64)
if (GGML_LASX)
list(APPEND ARCH_FLAGS -mlasx)
Expand All @@ -346,6 +359,10 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
endif()
elseif (GGML_SYSTEM_ARCH STREQUAL "riscv64")
message(STATUS "riscv64 detected")
list(APPEND GGML_CPU_SOURCES
ggml-cpu/arch/riscv/quants.c
ggml-cpu/arch/riscv/repack.cpp
)
if (GGML_RVV)
if (GGML_XTHEADVECTOR)
list(APPEND ARCH_FLAGS -march=rv64gc_xtheadvector -mabi=lp64d)
Expand All @@ -357,6 +374,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
endif()
elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
message(STATUS "s390x detected")
list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/s390/quants.c)
file(READ "/proc/cpuinfo" CPUINFO_CONTENTS)
string(REGEX REPLACE "machine[ \t\r\n]*=[ \t\r\n]*([0-9]+)" "\\1" S390X_M ${CPUINFO_CONTENTS})

Expand All @@ -380,12 +398,16 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
if (GGML_VXE)
list(APPEND ARCH_FLAGS -mvx -mzvector)
endif()
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "wasm")
message(STATUS "Wasm detected")
list (APPEND GGML_CPU_SOURCES ggml-cpu/arch/wasm/quants.c)
else()
message(STATUS "Unknown architecture")
message(WARNING "Unknown CPU architecture. Falling back to generic implementations.")
list(APPEND ARCH_FLAGS -DGGML_CPU_GENERIC)
endif()

if (GGML_CPU_AARCH64)
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_AARCH64)
if (GGML_CPU_REPACK)
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_REPACK)
endif()

if (GGML_CPU_KLEIDIAI)
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-cpu/amx/amx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include "ggml-backend.h"
#include "ggml-impl.h"
#include "ggml-cpu.h"
#include "ggml-cpu-traits.h"
#include "traits.h"

#if defined(__gnu_linux__)
#include <sys/syscall.h>
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-cpu/amx/mmq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include "mmq.h"
#include "ggml-impl.h"
#include "ggml-cpu-impl.h"
#include "ggml-cpu-quants.h"
#include "quants.h"
#include "ggml-quants.h"
#include <algorithm>
#include <type_traits>
Expand Down
Loading
Loading