Skip to content

Commit 19bbd3b

Browse files
committed
metal: matrix-matrix multiplication kernel
This commit removes MPS and uses custom matrix-matrix multiplication kernels for all quantization types. This commit also adds grouped-query attention to support llama2 70B.
1 parent 25d43e0 commit 19bbd3b

File tree

6 files changed

+496
-634
lines changed

6 files changed

+496
-634
lines changed

CMakeLists.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,6 @@ if (LLAMA_METAL)
298298
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
299299
find_library(METAL_FRAMEWORK Metal REQUIRED)
300300
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
301-
find_library(METALPERFORMANCE_FRAMEWORK MetalPerformanceShaders REQUIRED)
302301

303302
set(GGML_SOURCES_METAL ggml-metal.m ggml-metal.h)
304303

@@ -315,7 +314,6 @@ if (LLAMA_METAL)
315314
${FOUNDATION_LIBRARY}
316315
${METAL_FRAMEWORK}
317316
${METALKIT_FRAMEWORK}
318-
${METALPERFORMANCE_FRAMEWORK}
319317
)
320318
endif()
321319

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ endif # LLAMA_CLBLAST
283283
ifdef LLAMA_METAL
284284
CFLAGS += -DGGML_USE_METAL -DGGML_METAL_NDEBUG
285285
CXXFLAGS += -DGGML_USE_METAL
286-
LDFLAGS += -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
286+
LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
287287
OBJS += ggml-metal.o
288288
endif # LLAMA_METAL
289289

flake.nix

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,6 @@
1414
with pkgs.darwin.apple_sdk_11_0.frameworks; [
1515
Accelerate
1616
MetalKit
17-
MetalPerformanceShaders
18-
MetalPerformanceShadersGraph
1917
]
2018
else if isAarch32 && isDarwin then
2119
with pkgs.darwin.apple_sdk.frameworks; [

0 commit comments

Comments
 (0)