From bf1c8d761ab0cc9862591aa8167d83ea8689ab56 Mon Sep 17 00:00:00 2001 From: Michael Potter Date: Sun, 12 Nov 2023 15:07:24 -0800 Subject: [PATCH 1/7] Update ggml-quants.c --- ggml-quants.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ggml-quants.c b/ggml-quants.c index 740be6dc5c798..42bf749e94e39 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -1273,7 +1273,8 @@ static float make_qkx2_quants(int n, int nmax, const float * restrict x, const f float max = x[0]; float sum_w = weights[0]; float sum_x = sum_w * x[0]; - for (int i = 1; i < n; ++i) { + // Mark i as volatile to prevent the -O3 optimizer from unrolling this loop and breaking MacOS Sonoma quantization + for (volatile int i = 1; i < n; ++i) { if (x[i] < min) min = x[i]; if (x[i] > max) max = x[i]; float w = weights[i]; From 7a5e92e7482d570d4f10ae12c28797580e9d6c80 Mon Sep 17 00:00:00 2001 From: Michael Potter Date: Sun, 12 Nov 2023 16:52:45 -0800 Subject: [PATCH 2/7] Update ggml-quants.c Co-authored-by: Jared Van Bortel --- ggml-quants.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ggml-quants.c b/ggml-quants.c index 42bf749e94e39..5e4da4d373efd 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -1273,8 +1273,12 @@ static float make_qkx2_quants(int n, int nmax, const float * restrict x, const f float max = x[0]; float sum_w = weights[0]; float sum_x = sum_w * x[0]; - // Mark i as volatile to prevent the -O3 optimizer from unrolling this loop and breaking MacOS Sonoma quantization - for (volatile int i = 1; i < n; ++i) { ++#if defined(__APPLE__) && defined(__clang_major__) && __clang_major__ >= 15 ++ // use 'volatile' to prevent unroll and work around a bug in Apple clang 15.x.x ++ for (volatile int i = 1; i < n; ++i) { ++#else + for (int i = 1; i < n; ++i) { ++#endif if (x[i] < min) min = x[i]; if (x[i] > max) max = x[i]; float w = weights[i]; From 287bc685738ff415d872823103a61cfd56cdc3ef Mon Sep 17 00:00:00 2001 From: Michael Potter Date: Sun, 12 Nov 2023 16:54:53 -0800 Subject: [PATCH 3/7] Update ggml-quants.c --- ggml-quants.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ggml-quants.c b/ggml-quants.c index 5e4da4d373efd..a6445b1540f45 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -1273,12 +1273,12 @@ static float make_qkx2_quants(int n, int nmax, const float * restrict x, const f float max = x[0]; float sum_w = weights[0]; float sum_x = sum_w * x[0]; -+#if defined(__APPLE__) && defined(__clang_major__) && __clang_major__ >= 15 -+ // use 'volatile' to prevent unroll and work around a bug in Apple clang 15.x.x -+ for (volatile int i = 1; i < n; ++i) { -+#else - for (int i = 1; i < n; ++i) { -+#endif + #if defined(__APPLE__) && defined(__clang_major__) && __clang_major__ >= 15 + // use 'volatile' to prevent unroll and work around a bug in Apple clang 15.x.x with -O3 flag + for (volatile int i = 1; i < n; ++i) { + #else + for (int i = 1; i < n; ++i) { + #endif if (x[i] < min) min = x[i]; if (x[i] > max) max = x[i]; float w = weights[i]; From 5b0d76f665b6bcfdd4f1444224fcb98f7bed5e56 Mon Sep 17 00:00:00 2001 From: Michael Potter Date: Sun, 12 Nov 2023 18:17:39 -0800 Subject: [PATCH 4/7] increase indentation per 4-spaces rule --- ggml-quants.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ggml-quants.c b/ggml-quants.c index a6445b1540f45..6c2977a48b69d 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -1274,10 +1274,10 @@ static float make_qkx2_quants(int n, int nmax, const float * restrict x, const f float sum_w = weights[0]; float sum_x = sum_w * x[0]; #if defined(__APPLE__) && defined(__clang_major__) && __clang_major__ >= 15 - // use 'volatile' to prevent unroll and work around a bug in Apple clang 15.x.x with -O3 flag - for (volatile int i = 1; i < n; ++i) { + // use 'volatile' to prevent unroll and work around a bug in Apple clang 15.x.x with -O3 flag + for (volatile int i = 1; i < n; ++i) { #else - for (int i = 1; i < n; ++i) { + for (int i = 1; i < n; ++i) { #endif if (x[i] < min) min = x[i]; if (x[i] > max) max = x[i]; From 63c950c5b724fd0dfc338692a76d433a83911a07 Mon Sep 17 00:00:00 2001 From: Michael Potter Date: Mon, 13 Nov 2023 09:08:11 -0800 Subject: [PATCH 5/7] Update ggml-quants.c Co-authored-by: Georgi Gerganov --- ggml-quants.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ggml-quants.c b/ggml-quants.c index 6c2977a48b69d..7200450861c7c 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -1273,12 +1273,12 @@ static float make_qkx2_quants(int n, int nmax, const float * restrict x, const f float max = x[0]; float sum_w = weights[0]; float sum_x = sum_w * x[0]; - #if defined(__APPLE__) && defined(__clang_major__) && __clang_major__ >= 15 - // use 'volatile' to prevent unroll and work around a bug in Apple clang 15.x.x with -O3 flag - for (volatile int i = 1; i < n; ++i) { - #else - for (int i = 1; i < n; ++i) { - #endif +#if defined(__APPLE__) && defined(__clang_major__) && __clang_major__ >= 15 + // use 'volatile' to prevent unroll and work around a bug in Apple clang 15.x.x with -O3 flag + for (volatile int i = 1; i < n; ++i) { +#else + for (int i = 1; i < n; ++i) { +#endif if (x[i] < min) min = x[i]; if (x[i] > max) max = x[i]; float w = weights[i]; From 7962d0a789a2bfbea52bfbc33d3aacbf1df4d74c Mon Sep 17 00:00:00 2001 From: cebtenzzre Date: Tue, 14 Nov 2023 00:04:01 -0500 Subject: [PATCH 6/7] detect linker version instead of compiler version --- CMakeLists.txt | 9 +++++++++ Makefile | 5 +++++ ggml-quants.c | 4 ++-- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7b4eb18403c0b..d36797f6cf440 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -458,6 +458,15 @@ if (LLAMA_LTO) endif() endif() +# this version of Apple ld64 is buggy +execute_process( + COMMAND ${CMAKE_C_COMPILER} ${CMAKE_EXE_LINKER_FLAGS} -Wl,-v + ERROR_VARIABLE output +) +if (output MATCHES "dyld-1015\.7") + add_compile_definitions(-DBUGGY_APPLE_LINKER) +endif() + # Architecture specific # TODO: probably these flags need to be tweaked on some architectures # feel free to update the Makefile for your architecture and send a pull request or issue diff --git a/Makefile b/Makefile index d6be254a0f362..785060c143487 100644 --- a/Makefile +++ b/Makefile @@ -239,6 +239,11 @@ else endif endif +# this version of Apple ld64 is buggy +ifneq '' '$(findstring dyld-1015.7,$(shell $(CC) $(LDFLAGS) -Wl,-v 2>&1))' + MK_CPPFLAGS += -DBUGGY_APPLE_LINKER +endif + # OS specific # TODO: support Windows ifneq '' '$(filter $(UNAME_S),Linux Darwin FreeBSD NetBSD OpenBSD Haiku)' diff --git a/ggml-quants.c b/ggml-quants.c index 7200450861c7c..8ea22548a1866 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -1273,8 +1273,8 @@ static float make_qkx2_quants(int n, int nmax, const float * restrict x, const f float max = x[0]; float sum_w = weights[0]; float sum_x = sum_w * x[0]; -#if defined(__APPLE__) && defined(__clang_major__) && __clang_major__ >= 15 - // use 'volatile' to prevent unroll and work around a bug in Apple clang 15.x.x with -O3 flag +#ifdef BUGGY_APPLE_LINKER + // use 'volatile' to prevent unroll and work around a bug in Apple ld64 1015.7 for (volatile int i = 1; i < n; ++i) { #else for (int i = 1; i < n; ++i) { From 512d9746bfcb6fc5b6ba6532c00584a2c3a9bc88 Mon Sep 17 00:00:00 2001 From: cebtenzzre Date: Tue, 14 Nov 2023 00:08:58 -0500 Subject: [PATCH 7/7] use a more conventional macro name also fix an issue with CMake <3.26 compatibility --- CMakeLists.txt | 2 +- Makefile | 2 +- ggml-quants.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d36797f6cf440..db1f42f1eda6a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -464,7 +464,7 @@ execute_process( ERROR_VARIABLE output ) if (output MATCHES "dyld-1015\.7") - add_compile_definitions(-DBUGGY_APPLE_LINKER) + add_compile_definitions(HAVE_BUGGY_APPLE_LINKER) endif() # Architecture specific diff --git a/Makefile b/Makefile index 785060c143487..36d08811e32b6 100644 --- a/Makefile +++ b/Makefile @@ -241,7 +241,7 @@ endif # this version of Apple ld64 is buggy ifneq '' '$(findstring dyld-1015.7,$(shell $(CC) $(LDFLAGS) -Wl,-v 2>&1))' - MK_CPPFLAGS += -DBUGGY_APPLE_LINKER + MK_CPPFLAGS += -DHAVE_BUGGY_APPLE_LINKER endif # OS specific diff --git a/ggml-quants.c b/ggml-quants.c index 8ea22548a1866..dcfdfb8f4a23f 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -1273,7 +1273,7 @@ static float make_qkx2_quants(int n, int nmax, const float * restrict x, const f float max = x[0]; float sum_w = weights[0]; float sum_x = sum_w * x[0]; -#ifdef BUGGY_APPLE_LINKER +#ifdef HAVE_BUGGY_APPLE_LINKER // use 'volatile' to prevent unroll and work around a bug in Apple ld64 1015.7 for (volatile int i = 1; i < n; ++i) { #else