From ebc8707a3cdbd471b81275207fe38bba9a9c3784 Mon Sep 17 00:00:00 2001 From: peardox Date: Wed, 23 Apr 2025 18:48:05 +0100 Subject: [PATCH 01/34] Add base of flat bindings - to expand --- CMakeLists.txt | 3 +++ examples/bench/bench.cpp | 7 +++++++ examples/cli/cli.cpp | 7 +++++++ examples/stream/stream.cpp | 7 +++++++ ggml/src/CMakeLists.txt | 16 +++++++++++++++- ggml/src/ggml-flat.cpp | 35 +++++++++++++++++++++++++++++++++++ ggml/src/ggml-flat.h | 28 ++++++++++++++++++++++++++++ src/CMakeLists.txt | 20 +++++++++++++++++--- src/whisper-flat.cpp | 34 ++++++++++++++++++++++++++++++++++ src/whisper-flat.h | 26 ++++++++++++++++++++++++++ 10 files changed, 179 insertions(+), 4 deletions(-) create mode 100644 ggml/src/ggml-flat.cpp create mode 100644 ggml/src/ggml-flat.h create mode 100644 src/whisper-flat.cpp create mode 100644 src/whisper-flat.h diff --git a/CMakeLists.txt b/CMakeLists.txt index be6db903c4a..2d0bfe46492 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -70,6 +70,9 @@ option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in option(WHISPER_FATAL_WARNINGS "whisper: enable -Werror flag" OFF) option(WHISPER_USE_SYSTEM_GGML "whisper: use system-installed GGML library" OFF) +# flat bindings +option(BINDINGS_FLAT "Add extra flat definitions to Whisper + GGML" OFF) + # sanitizers option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF) option(WHISPER_SANITIZE_ADDRESS "whisper: enable address sanitizer" OFF) diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp index 54f73110d42..dbe527c86fb 100644 --- a/examples/bench/bench.cpp +++ b/examples/bench/bench.cpp @@ -1,4 +1,7 @@ #include "whisper.h" +#if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) +#include "whisper-flat.h" +#endif #include #include @@ -61,6 +64,10 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para static int whisper_bench_full(const whisper_params & params) { // whisper init + #if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) + whisper_flat_backend_load_all(); + #endif + struct whisper_context_params cparams = whisper_context_default_params(); cparams.use_gpu = params.use_gpu; diff --git a/examples/cli/cli.cpp b/examples/cli/cli.cpp index 4b2b3521b80..07ffcac93c2 100644 --- a/examples/cli/cli.cpp +++ b/examples/cli/cli.cpp @@ -2,6 +2,9 @@ #include "common-whisper.h" #include "whisper.h" +#if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) +#include "whisper-flat.h" +#endif #include "grammar-parser.h" #include @@ -1004,6 +1007,10 @@ int main(int argc, char ** argv) { // whisper init + #if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) + whisper_flat_backend_load_all(); + #endif + struct whisper_context_params cparams = whisper_context_default_params(); cparams.use_gpu = params.use_gpu; diff --git a/examples/stream/stream.cpp b/examples/stream/stream.cpp index 65c6587db92..5dc0c2eee20 100644 --- a/examples/stream/stream.cpp +++ b/examples/stream/stream.cpp @@ -6,6 +6,9 @@ #include "common.h" #include "common-whisper.h" #include "whisper.h" +#if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) +#include "whisper-flat.h" +#endif #include #include @@ -155,6 +158,10 @@ int main(int argc, char ** argv) { exit(0); } + #if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) + whisper_flat_backend_load_all(); + #endif + struct whisper_context_params cparams = whisper_context_default_params(); cparams.use_gpu = params.use_gpu; diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index f00700da71f..d7c3d3885b9 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -208,8 +208,22 @@ if (GGML_BACKEND_DL) target_compile_definitions(ggml-base PUBLIC GGML_BACKEND_DL) endif() +set(GGML_LIBRARY_SOURCES + ggml-backend-reg.cpp) + +if(BINDINGS_FLAT) + message(STATUS "Adding FLAT GGML binding extras") + + set(FLAT_GGML_SOURCES + ggml-flat.cpp + ) + + list(APPEND GGML_LIBRARY_SOURCES ${FLAT_GGML_SOURCES}) +endif() + add_library(ggml - ggml-backend-reg.cpp) + ${GGML_LIBRARY_SOURCES} + ) target_link_libraries(ggml PUBLIC ggml-base) diff --git a/ggml/src/ggml-flat.cpp b/ggml/src/ggml-flat.cpp new file mode 100644 index 00000000000..ea727ecbec9 --- /dev/null +++ b/ggml/src/ggml-flat.cpp @@ -0,0 +1,35 @@ + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#ifndef NOMINMAX +# define NOMINMAX +#endif +#include +#endif + +#include "ggml-backend.h" +#include "ggml-backend-impl.h" +#include "ggml-alloc.h" +#include "ggml-impl.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __APPLE__ +#include +#include +#endif + +#include "ggml-flat.h" + + + + + diff --git a/ggml/src/ggml-flat.h b/ggml/src/ggml-flat.h new file mode 100644 index 00000000000..48f13068d9d --- /dev/null +++ b/ggml/src/ggml-flat.h @@ -0,0 +1,28 @@ +#pragma once + +#ifdef BINDINGS_FLAT +#endif + +#ifdef GGML_SHARED +# if defined(_WIN32) && !defined(__MINGW32__) +# ifdef GGML_BUILD +# define GGML_API __declspec(dllexport) extern +# else +# define GGML_API __declspec(dllimport) extern +# endif +# else +# define GGML_API __attribute__ ((visibility ("default"))) extern +# endif +#else +# define GGML_API extern +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + + +#ifdef __cplusplus +} +#endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a091e66a25f..5f90e81a3ac 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -100,10 +100,24 @@ endif() # whisper +set(WHISPER_LIBRARY_SOURCES + ../include/whisper.h + whisper-arch.h + whisper.cpp + ) + +if(BINDINGS_FLAT) + message(STATUS "Adding FLAT Whisper binding extras") + + set(FLAT_WHISPER_SOURCES + whisper-flat.cpp + ) + + list(APPEND WHISPER_LIBRARY_SOURCES ${FLAT_WHISPER_SOURCES}) +endif() + add_library(whisper - ../include/whisper.h - whisper-arch.h - whisper.cpp + ${WHISPER_LIBRARY_SOURCES} ) # Set the version numbers diff --git a/src/whisper-flat.cpp b/src/whisper-flat.cpp new file mode 100644 index 00000000000..0fc5eedf649 --- /dev/null +++ b/src/whisper-flat.cpp @@ -0,0 +1,34 @@ +#include "whisper.h" +#include "whisper-arch.h" + +#include "ggml.h" +#include "ggml-cpp.h" +#include "ggml-alloc.h" +#include "ggml-backend.h" + +#include +#include +#include +#define _USE_MATH_DEFINES +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "whisper-flat.h" + +void whisper_flat_backend_load_all(void) { + ggml_backend_load_all(); +} diff --git a/src/whisper-flat.h b/src/whisper-flat.h new file mode 100644 index 00000000000..0f16a909b9f --- /dev/null +++ b/src/whisper-flat.h @@ -0,0 +1,26 @@ +#pragma once + +#ifdef WHISPER_SHARED +# ifdef _WIN32 +# ifdef WHISPER_BUILD +# define WHISPER_API __declspec(dllexport) +# else +# define WHISPER_API __declspec(dllimport) +# endif +# else +# define WHISPER_API __attribute__ ((visibility ("default"))) +# endif +#else +# define WHISPER_API +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + WHISPER_API void whisper_flat_backend_load_all(void); + +#ifdef __cplusplus +} +#endif + From 2527199652a696e644ac6f299f7b43f0b49d6b48 Mon Sep 17 00:00:00 2001 From: peardox Date: Thu, 24 Apr 2025 10:07:45 +0100 Subject: [PATCH 02/34] Add and expose ggml_backend_try_load_best --- ggml/include/ggml-backend.h | 2 ++ ggml/src/ggml-backend-reg.cpp | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h index 64671495b38..b96a1f8579d 100644 --- a/ggml/include/ggml-backend.h +++ b/ggml/include/ggml-backend.h @@ -348,6 +348,8 @@ extern "C" { // CPU buffer types are always available GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size); GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void); + + GGML_API ggml_backend_reg_t ggml_backend_try_load_best(const char * name, const char * dir_path); #ifdef __cplusplus } diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp index 405d8e31514..40ba454b695 100644 --- a/ggml/src/ggml-backend-reg.cpp +++ b/ggml/src/ggml-backend-reg.cpp @@ -584,3 +584,13 @@ void ggml_backend_load_all_from_path(const char * dir_path) { ggml_backend_load(backend_path); } } + +ggml_backend_reg_t ggml_backend_try_load_best(const char * name, const char * dir_path) { +#ifdef NDEBUG + bool silent = true; +#else + bool silent = false; +#endif + + return ggml_backend_load_best(name, silent, dir_path); +} From e5184ae9188fbe4497c40e2fd7a9a16bf2db3e7a Mon Sep 17 00:00:00 2001 From: peardox Date: Thu, 24 Apr 2025 12:22:32 +0100 Subject: [PATCH 03/34] Add back previous closed PR code --- ggml/src/ggml-flat.h | 8 ++--- include/whisper.h | 6 ++++ src/whisper-flat.cpp | 12 +++++++ src/whisper-flat.h | 15 ++++++--- src/whisper.cpp | 74 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 106 insertions(+), 9 deletions(-) diff --git a/ggml/src/ggml-flat.h b/ggml/src/ggml-flat.h index 48f13068d9d..7537d7249f2 100644 --- a/ggml/src/ggml-flat.h +++ b/ggml/src/ggml-flat.h @@ -6,15 +6,15 @@ #ifdef GGML_SHARED # if defined(_WIN32) && !defined(__MINGW32__) # ifdef GGML_BUILD -# define GGML_API __declspec(dllexport) extern +# define GGML_FLAT_API __declspec(dllexport) extern # else -# define GGML_API __declspec(dllimport) extern +# define GGML_FLAT_API __declspec(dllimport) extern # endif # else -# define GGML_API __attribute__ ((visibility ("default"))) extern +# define GGML_FLAT_API __attribute__ ((visibility ("default"))) extern # endif #else -# define GGML_API extern +# define GGML_FLAT_API extern #endif #ifdef __cplusplus diff --git a/include/whisper.h b/include/whisper.h index 1e1375033ad..9639d69019a 100644 --- a/include/whisper.h +++ b/include/whisper.h @@ -668,6 +668,12 @@ extern "C" { // Get the no_speech probability for the specified segment WHISPER_API float whisper_full_get_segment_no_speech_prob (struct whisper_context * ctx, int i_segment); WHISPER_API float whisper_full_get_segment_no_speech_prob_from_state(struct whisper_state * state, int i_segment); + + // For whisper-flat.cpp to expose + const char * whisper_get_system_info_json(void); + struct whisper_state * whisper_get_state_from_context(struct whisper_context * ctx); + struct whisper_timings * whisper_get_timings_with_state(struct whisper_state * state); + #ifdef __cplusplus } #endif diff --git a/src/whisper-flat.cpp b/src/whisper-flat.cpp index 0fc5eedf649..249eb01d5fd 100644 --- a/src/whisper-flat.cpp +++ b/src/whisper-flat.cpp @@ -32,3 +32,15 @@ void whisper_flat_backend_load_all(void) { ggml_backend_load_all(); } + +const char * whisper_flat_get_system_info_json(void) { + return whisper_get_system_info_json(); +} + +struct whisper_state * whisper_flat_get_state_from_context(struct whisper_context * ctx) { + return whisper_get_state_from_context(ctx); +} + +struct whisper_timings * whisper_flat_get_timings_with_state(struct whisper_state * state) { + return whisper_get_timings_with_state(state); +} \ No newline at end of file diff --git a/src/whisper-flat.h b/src/whisper-flat.h index 0f16a909b9f..55976acb59a 100644 --- a/src/whisper-flat.h +++ b/src/whisper-flat.h @@ -3,22 +3,27 @@ #ifdef WHISPER_SHARED # ifdef _WIN32 # ifdef WHISPER_BUILD -# define WHISPER_API __declspec(dllexport) +# define WHISPER_FLAT_API __declspec(dllexport) # else -# define WHISPER_API __declspec(dllimport) +# define WHISPER_FLAT_API __declspec(dllimport) # endif # else -# define WHISPER_API __attribute__ ((visibility ("default"))) +# define WHISPER_FLAT_API __attribute__ ((visibility ("default"))) # endif #else -# define WHISPER_API +# define WHISPER_FLAT_API #endif +#include "whisper.h" + #ifdef __cplusplus extern "C" { #endif - WHISPER_API void whisper_flat_backend_load_all(void); + WHISPER_FLAT_API void whisper_flat_backend_load_all(void); + WHISPER_FLAT_API struct whisper_timings * whisper_flat_get_timings_with_state(struct whisper_state * state); + WHISPER_FLAT_API struct whisper_state * whisper_flat_get_state_from_context(struct whisper_context * ctx); + WHISPER_FLAT_API const char * whisper_flat_get_system_info_json(void); #ifdef __cplusplus } diff --git a/src/whisper.cpp b/src/whisper.cpp index 2c83f7bab3b..94e2d0b5c7a 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -7550,3 +7550,77 @@ static void whisper_log_callback_default(ggml_log_level level, const char * text fputs(text, stderr); fflush(stderr); } + +// whisper_get_system_info_json +// Returns system info as json, useful for language bindings +// NOTE : While testing features->value always returned an int. +// Even though ints are invariably returned they may be +// some values that return other types. +// This function returns everything quoted (i.e. as a string) +// and leaves type-casting to the caller. +// This also removes the unlikely but plausible state of +// a string being returned unquoted (thus invalidating JSON) + +const char * whisper_get_system_info_json(void) { + static std::string s; + + s = "{"; + s += "\"WHISPER\":{"; + s += "\"COREML\":\"" + std::to_string(whisper_has_coreml()) + "\","; + s += "\"OPENVINO\":\"" + std::to_string(whisper_has_openvino()) + "\"}"; + + for (size_t i = 0; i < ggml_backend_reg_count(); i++) { + auto * reg = ggml_backend_reg_get(i); + auto * get_features_fn = (ggml_backend_get_features_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_get_features"); + if (get_features_fn) { + ggml_backend_feature * features = get_features_fn(reg); + s += ",\""; + s += ggml_backend_reg_name(reg); + s += "\":{"; + auto first = true; + for (; features->name; features++) { + if(first) { + first = false; + } else { + s += ","; + } + s += "\""; + s += features->name; + s += "\":\""; + s += features->value; + s += "\""; + } + s += "}"; + } + } + s += "}"; + + return s.c_str(); +} + +// whisper_get_state_from_context +// Returns state from supplied context pointer +// This is mainly a helper for non-C++ language bindings as whisper_context +// has embedded C++ specific types (e.g. maps and vectors) +struct whisper_state * whisper_get_state_from_context(struct whisper_context * ctx) { + if (!ctx->state) { + return nullptr; + } + + return ctx->state; +} + +// whisper_get_timings_with_state +// Just a version of whisper_get_timings that takes state as a parameter +struct whisper_timings * whisper_get_timings_with_state(struct whisper_state * state) { + if (state == nullptr) { + return nullptr; + } + whisper_timings * timings = new whisper_timings; + timings->sample_ms = 1e-3f * state->t_sample_us / std::max(1, state->n_sample); + timings->encode_ms = 1e-3f * state->t_encode_us / std::max(1, state->n_encode); + timings->decode_ms = 1e-3f * state->t_decode_us / std::max(1, state->n_decode); + timings->batchd_ms = 1e-3f * state->t_batchd_us / std::max(1, state->n_batchd); + timings->prompt_ms = 1e-3f * state->t_prompt_us / std::max(1, state->n_prompt); + return timings; +} \ No newline at end of file From 0510bb764ed71c319014ff2b75dddfa79f3e4745 Mon Sep 17 00:00:00 2001 From: peardox Date: Fri, 25 Apr 2025 01:09:10 +0100 Subject: [PATCH 04/34] disable whisper_load_backends if GGML_BACKEND_DL - prevents the loadall --- ggml/src/ggml-opencl/ggml-opencl.cpp | 8 +++++--- src/whisper.cpp | 10 ++++++++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/ggml/src/ggml-opencl/ggml-opencl.cpp b/ggml/src/ggml-opencl/ggml-opencl.cpp index 723cab8b174..0a94a3522c1 100644 --- a/ggml/src/ggml-opencl/ggml-opencl.cpp +++ b/ggml/src/ggml-opencl/ggml-opencl.cpp @@ -2,9 +2,11 @@ #define CL_USE_DEPRECATED_OPENCL_1_2_APIS // suppress warnings in CL headers for GCC and Clang -#pragma GCC diagnostic ignored "-Woverlength-strings" -#ifdef __clang__ -#pragma GCC diagnostic ignored "-Wgnu-anonymous-struct" +#ifndef _MSC_VER + #pragma GCC diagnostic ignored "-Woverlength-strings" + #ifdef __clang__ + #pragma GCC diagnostic ignored "-Wgnu-anonymous-struct" + #endif #endif #include "ggml-opencl.h" diff --git a/src/whisper.cpp b/src/whisper.cpp index 94e2d0b5c7a..21065adc1fa 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -209,7 +209,7 @@ static bool ggml_graph_compute_helper( } static void whisper_load_backends() { -#ifdef GGML_BACKEND_DL +#ifndef GGML_BACKEND_DL static std::once_flag flag; std::call_once(flag, []() { ggml_backend_load_all(); @@ -1313,8 +1313,10 @@ static size_t aheads_masks_nbytes(struct whisper_aheads_masks & aheads_masks) { static ggml_backend_t whisper_backend_init_gpu(const whisper_context_params & params) { ggml_log_set(g_state.log_callback, g_state.log_callback_user_data); + #ifndef GGML_BACKEND_DL whisper_load_backends(); - + #endif + ggml_backend_dev_t dev = nullptr; int cnt = 0; @@ -4321,7 +4323,9 @@ static int whisper_has_openvino(void) { const char * whisper_print_system_info(void) { static std::string s; + #ifndef GGML_BACKEND_DL whisper_load_backends(); + #endif s = ""; s += "WHISPER : "; @@ -6776,7 +6780,9 @@ WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads) { } WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) { + #ifndef GGML_BACKEND_DL whisper_load_backends(); + #endif static std::string s; s = ""; From 1e7a15e4267ba71b4b93b677ee5e7dfd8b995137 Mon Sep 17 00:00:00 2001 From: peardox Date: Fri, 25 Apr 2025 05:48:52 +0100 Subject: [PATCH 05/34] Add whisper_flat_get_preferred_backend --- include/whisper.h | 1 + src/whisper-flat.cpp | 6 +++++- src/whisper-flat.h | 1 + src/whisper.cpp | 8 ++++++++ 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/include/whisper.h b/include/whisper.h index 9639d69019a..fa084049ec3 100644 --- a/include/whisper.h +++ b/include/whisper.h @@ -673,6 +673,7 @@ extern "C" { const char * whisper_get_system_info_json(void); struct whisper_state * whisper_get_state_from_context(struct whisper_context * ctx); struct whisper_timings * whisper_get_timings_with_state(struct whisper_state * state); + ggml_backend_t whisper_get_preferred_backend(struct whisper_state * state); #ifdef __cplusplus } diff --git a/src/whisper-flat.cpp b/src/whisper-flat.cpp index 249eb01d5fd..1df91bcccea 100644 --- a/src/whisper-flat.cpp +++ b/src/whisper-flat.cpp @@ -43,4 +43,8 @@ struct whisper_state * whisper_flat_get_state_from_context(struct whisper_contex struct whisper_timings * whisper_flat_get_timings_with_state(struct whisper_state * state) { return whisper_get_timings_with_state(state); -} \ No newline at end of file +} + +ggml_backend_t whisper_flat_get_preferred_backend(struct whisper_state * state) { + return whisper_get_preferred_backend(state); +} diff --git a/src/whisper-flat.h b/src/whisper-flat.h index 55976acb59a..954be39f4d5 100644 --- a/src/whisper-flat.h +++ b/src/whisper-flat.h @@ -24,6 +24,7 @@ extern "C" { WHISPER_FLAT_API struct whisper_timings * whisper_flat_get_timings_with_state(struct whisper_state * state); WHISPER_FLAT_API struct whisper_state * whisper_flat_get_state_from_context(struct whisper_context * ctx); WHISPER_FLAT_API const char * whisper_flat_get_system_info_json(void); + WHISPER_FLAT_API ggml_backend_t whisper_flat_get_preferred_backend(struct whisper_state * state); #ifdef __cplusplus } diff --git a/src/whisper.cpp b/src/whisper.cpp index 21065adc1fa..ec9566d2e86 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -7629,4 +7629,12 @@ struct whisper_timings * whisper_get_timings_with_state(struct whisper_state * s timings->batchd_ms = 1e-3f * state->t_batchd_us / std::max(1, state->n_batchd); timings->prompt_ms = 1e-3f * state->t_prompt_us / std::max(1, state->n_prompt); return timings; +} + +ggml_backend_t whisper_get_preferred_backend(struct whisper_state * state) { + if (state->backends.empty()) { + return nullptr; + } + + return state->backends[0]; } \ No newline at end of file From 7307aa7464dbe81c43dfafc8568edc0ff366aa15 Mon Sep 17 00:00:00 2001 From: peardox Date: Sat, 26 Apr 2025 17:01:43 +0100 Subject: [PATCH 06/34] Add whisper_get_backend_count and whisper_get_indexed_backend --- include/whisper.h | 4 +++- src/whisper-flat.cpp | 8 ++++++++ src/whisper-flat.h | 2 ++ src/whisper.cpp | 14 ++++++++++++++ 4 files changed, 27 insertions(+), 1 deletion(-) diff --git a/include/whisper.h b/include/whisper.h index fa084049ec3..13c646aed50 100644 --- a/include/whisper.h +++ b/include/whisper.h @@ -674,7 +674,9 @@ extern "C" { struct whisper_state * whisper_get_state_from_context(struct whisper_context * ctx); struct whisper_timings * whisper_get_timings_with_state(struct whisper_state * state); ggml_backend_t whisper_get_preferred_backend(struct whisper_state * state); - + ggml_backend_t whisper_get_indexed_backend(struct whisper_state* state, int i); + size_t whisper_get_backend_count(struct whisper_state* state); + #ifdef __cplusplus } #endif diff --git a/src/whisper-flat.cpp b/src/whisper-flat.cpp index 1df91bcccea..a40fd51d225 100644 --- a/src/whisper-flat.cpp +++ b/src/whisper-flat.cpp @@ -48,3 +48,11 @@ struct whisper_timings * whisper_flat_get_timings_with_state(struct whisper_stat ggml_backend_t whisper_flat_get_preferred_backend(struct whisper_state * state) { return whisper_get_preferred_backend(state); } + +ggml_backend_t whisper_flat_get_indexed_backend(struct whisper_state* state, int i) { + return whisper_get_indexed_backend(state, i); +} + +size_t whisper_flat_get_backend_count(struct whisper_state* state) { + return whisper_get_backend_count(state); +} diff --git a/src/whisper-flat.h b/src/whisper-flat.h index 954be39f4d5..097ef5c38ba 100644 --- a/src/whisper-flat.h +++ b/src/whisper-flat.h @@ -25,6 +25,8 @@ extern "C" { WHISPER_FLAT_API struct whisper_state * whisper_flat_get_state_from_context(struct whisper_context * ctx); WHISPER_FLAT_API const char * whisper_flat_get_system_info_json(void); WHISPER_FLAT_API ggml_backend_t whisper_flat_get_preferred_backend(struct whisper_state * state); + WHISPER_FLAT_API ggml_backend_t whisper_flat_get_indexed_backend(struct whisper_state* state, int i); + WHISPER_FLAT_API size_t whisper_flat_get_backend_count(struct whisper_state* state); #ifdef __cplusplus } diff --git a/src/whisper.cpp b/src/whisper.cpp index ec9566d2e86..6b52eff0dde 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -7637,4 +7637,18 @@ ggml_backend_t whisper_get_preferred_backend(struct whisper_state * state) { } return state->backends[0]; +} + +ggml_backend_t whisper_get_indexed_backend(struct whisper_state* state, int i) { + if (state->backends.empty()) { + return nullptr; + } + if (i >= state->backends.size()) { + return nullptr; + } + return state->backends[i]; +} + +size_t whisper_get_backend_count(struct whisper_state* state) { + return state->backends.size(); } \ No newline at end of file From c37671c4ab453fef56403e68d3352ae1177f76e6 Mon Sep 17 00:00:00 2001 From: peardox Date: Sun, 27 Apr 2025 03:29:28 +0100 Subject: [PATCH 07/34] OK, really finished - just enable examples via GGML_BACKEND_DL + BINDINGS_FLAT --- examples/command/command.cpp | 7 +++++++ examples/server/server.cpp | 7 +++++++ examples/talk-llama/talk-llama.cpp | 7 +++++++ examples/wchess/wchess.cmd/wchess.cmd.cpp | 7 +++++++ 4 files changed, 28 insertions(+) diff --git a/examples/command/command.cpp b/examples/command/command.cpp index 9dc8f629995..77abf59bd30 100644 --- a/examples/command/command.cpp +++ b/examples/command/command.cpp @@ -9,6 +9,9 @@ #include "common-sdl.h" #include "common.h" #include "whisper.h" +#if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) +#include "whisper-flat.h" +#endif #include "grammar-parser.h" #include @@ -692,6 +695,10 @@ int main(int argc, char ** argv) { // whisper init + #if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) + whisper_flat_backend_load_all(); + #endif + struct whisper_context_params cparams = whisper_context_default_params(); cparams.use_gpu = params.use_gpu; diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 38da61673df..42225b0e76b 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2,6 +2,9 @@ #include "common-whisper.h" #include "whisper.h" +#if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) +#include "whisper-flat.h" +#endif #include "httplib.h" #include "json.hpp" @@ -544,6 +547,10 @@ int main(int argc, char ** argv) { check_ffmpeg_availibility(); } // whisper init + #if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) + whisper_flat_backend_load_all(); + #endif + struct whisper_context_params cparams = whisper_context_default_params(); cparams.use_gpu = params.use_gpu; diff --git a/examples/talk-llama/talk-llama.cpp b/examples/talk-llama/talk-llama.cpp index 9097c491b61..d41ab6f970e 100644 --- a/examples/talk-llama/talk-llama.cpp +++ b/examples/talk-llama/talk-llama.cpp @@ -5,6 +5,9 @@ #include "common.h" #include "common-whisper.h" #include "whisper.h" +#if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) +#include "whisper-flat.h" +#endif #include "llama.h" #include @@ -287,6 +290,10 @@ int main(int argc, char ** argv) { // whisper init + #if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) + whisper_flat_backend_load_all(); + #endif + struct whisper_context_params cparams = whisper_context_default_params(); cparams.use_gpu = params.use_gpu; diff --git a/examples/wchess/wchess.cmd/wchess.cmd.cpp b/examples/wchess/wchess.cmd/wchess.cmd.cpp index 4d049976315..76b1b8ccfd0 100644 --- a/examples/wchess/wchess.cmd/wchess.cmd.cpp +++ b/examples/wchess/wchess.cmd/wchess.cmd.cpp @@ -7,6 +7,9 @@ #include "WChess.h" #include "common-sdl.h" +#if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) +#include "whisper-flat.h" +#endif #include #include @@ -182,6 +185,10 @@ int main(int argc, char ** argv) { // whisper init + #if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) + whisper_flat_backend_load_all(); + #endif + struct whisper_context_params cparams = whisper_context_default_params(); cparams.use_gpu = params.use_gpu; From c859e240a86829e257d90b2cef7525df7d3aa634 Mon Sep 17 00:00:00 2001 From: peardox Date: Sun, 27 Apr 2025 04:23:13 +0100 Subject: [PATCH 08/34] Change type to size_t --- include/whisper.h | 2 +- src/whisper-flat.cpp | 2 +- src/whisper-flat.h | 2 +- src/whisper.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/whisper.h b/include/whisper.h index 13c646aed50..37b43b2c073 100644 --- a/include/whisper.h +++ b/include/whisper.h @@ -674,7 +674,7 @@ extern "C" { struct whisper_state * whisper_get_state_from_context(struct whisper_context * ctx); struct whisper_timings * whisper_get_timings_with_state(struct whisper_state * state); ggml_backend_t whisper_get_preferred_backend(struct whisper_state * state); - ggml_backend_t whisper_get_indexed_backend(struct whisper_state* state, int i); + ggml_backend_t whisper_get_indexed_backend(struct whisper_state* state, size_t i); size_t whisper_get_backend_count(struct whisper_state* state); #ifdef __cplusplus diff --git a/src/whisper-flat.cpp b/src/whisper-flat.cpp index a40fd51d225..33f51d67403 100644 --- a/src/whisper-flat.cpp +++ b/src/whisper-flat.cpp @@ -49,7 +49,7 @@ ggml_backend_t whisper_flat_get_preferred_backend(struct whisper_state * state) return whisper_get_preferred_backend(state); } -ggml_backend_t whisper_flat_get_indexed_backend(struct whisper_state* state, int i) { +ggml_backend_t whisper_flat_get_indexed_backend(struct whisper_state* state, size_t i) { return whisper_get_indexed_backend(state, i); } diff --git a/src/whisper-flat.h b/src/whisper-flat.h index 097ef5c38ba..09909b8e91d 100644 --- a/src/whisper-flat.h +++ b/src/whisper-flat.h @@ -25,7 +25,7 @@ extern "C" { WHISPER_FLAT_API struct whisper_state * whisper_flat_get_state_from_context(struct whisper_context * ctx); WHISPER_FLAT_API const char * whisper_flat_get_system_info_json(void); WHISPER_FLAT_API ggml_backend_t whisper_flat_get_preferred_backend(struct whisper_state * state); - WHISPER_FLAT_API ggml_backend_t whisper_flat_get_indexed_backend(struct whisper_state* state, int i); + WHISPER_FLAT_API ggml_backend_t whisper_flat_get_indexed_backend(struct whisper_state* state, size_t i); WHISPER_FLAT_API size_t whisper_flat_get_backend_count(struct whisper_state* state); #ifdef __cplusplus diff --git a/src/whisper.cpp b/src/whisper.cpp index 6b52eff0dde..f30c15902e0 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -7639,7 +7639,7 @@ ggml_backend_t whisper_get_preferred_backend(struct whisper_state * state) { return state->backends[0]; } -ggml_backend_t whisper_get_indexed_backend(struct whisper_state* state, int i) { +ggml_backend_t whisper_get_indexed_backend(struct whisper_state* state, size_t i) { if (state->backends.empty()) { return nullptr; } From a0ccffa208c7a966b9eb3c8ad0b3244cf00c4475 Mon Sep 17 00:00:00 2001 From: peardox Date: Sun, 27 Apr 2025 04:34:17 +0100 Subject: [PATCH 09/34] Mac needs can't use GGML_BACKEND_DL, modify as needed --- examples/server/server.cpp | 4 ++-- examples/stream/stream.cpp | 4 ++-- examples/talk-llama/talk-llama.cpp | 4 ++-- examples/wchess/wchess.cmd/wchess.cmd.cpp | 4 ++-- src/whisper.cpp | 10 +++++----- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 42225b0e76b..230f6980003 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2,7 +2,7 @@ #include "common-whisper.h" #include "whisper.h" -#if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) +#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) #include "whisper-flat.h" #endif #include "httplib.h" @@ -547,7 +547,7 @@ int main(int argc, char ** argv) { check_ffmpeg_availibility(); } // whisper init - #if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) + #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) whisper_flat_backend_load_all(); #endif diff --git a/examples/stream/stream.cpp b/examples/stream/stream.cpp index 5dc0c2eee20..2505ffb79d9 100644 --- a/examples/stream/stream.cpp +++ b/examples/stream/stream.cpp @@ -6,7 +6,7 @@ #include "common.h" #include "common-whisper.h" #include "whisper.h" -#if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) +#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) #include "whisper-flat.h" #endif @@ -158,7 +158,7 @@ int main(int argc, char ** argv) { exit(0); } - #if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) + #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) whisper_flat_backend_load_all(); #endif diff --git a/examples/talk-llama/talk-llama.cpp b/examples/talk-llama/talk-llama.cpp index d41ab6f970e..3b131590cd5 100644 --- a/examples/talk-llama/talk-llama.cpp +++ b/examples/talk-llama/talk-llama.cpp @@ -5,7 +5,7 @@ #include "common.h" #include "common-whisper.h" #include "whisper.h" -#if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) +#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) #include "whisper-flat.h" #endif #include "llama.h" @@ -290,7 +290,7 @@ int main(int argc, char ** argv) { // whisper init - #if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) + #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) whisper_flat_backend_load_all(); #endif diff --git a/examples/wchess/wchess.cmd/wchess.cmd.cpp b/examples/wchess/wchess.cmd/wchess.cmd.cpp index 76b1b8ccfd0..7f60f262c7f 100644 --- a/examples/wchess/wchess.cmd/wchess.cmd.cpp +++ b/examples/wchess/wchess.cmd/wchess.cmd.cpp @@ -7,7 +7,7 @@ #include "WChess.h" #include "common-sdl.h" -#if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) +#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) #include "whisper-flat.h" #endif #include @@ -185,7 +185,7 @@ int main(int argc, char ** argv) { // whisper init - #if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) + #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) whisper_flat_backend_load_all(); #endif diff --git a/src/whisper.cpp b/src/whisper.cpp index f30c15902e0..8bec3e2258b 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -209,12 +209,12 @@ static bool ggml_graph_compute_helper( } static void whisper_load_backends() { -#ifndef GGML_BACKEND_DL + #if !defined(GGML_BACKEND_DL) or !defined(BINDINGS_FLAT) static std::once_flag flag; std::call_once(flag, []() { ggml_backend_load_all(); }); -#endif + #endif } // TODO: move these functions to ggml-base with support for ggml-backend? @@ -1313,7 +1313,7 @@ static size_t aheads_masks_nbytes(struct whisper_aheads_masks & aheads_masks) { static ggml_backend_t whisper_backend_init_gpu(const whisper_context_params & params) { ggml_log_set(g_state.log_callback, g_state.log_callback_user_data); - #ifndef GGML_BACKEND_DL + #if !defined(GGML_BACKEND_DL) or !defined(BINDINGS_FLAT) whisper_load_backends(); #endif @@ -4323,7 +4323,7 @@ static int whisper_has_openvino(void) { const char * whisper_print_system_info(void) { static std::string s; - #ifndef GGML_BACKEND_DL + #if !defined(GGML_BACKEND_DL) or !defined(BINDINGS_FLAT) whisper_load_backends(); #endif @@ -6780,7 +6780,7 @@ WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads) { } WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) { - #ifndef GGML_BACKEND_DL + #if !defined(GGML_BACKEND_DL) or !defined(BINDINGS_FLAT) whisper_load_backends(); #endif From b1ecac42a7e30b6c73b5c76aa3b657eb40dbc2f4 Mon Sep 17 00:00:00 2001 From: peardox Date: Sun, 27 Apr 2025 13:42:34 +0100 Subject: [PATCH 10/34] Forgot a few demos --- examples/bench/bench.cpp | 4 ++-- examples/cli/cli.cpp | 4 ++-- examples/command/command.cpp | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp index dbe527c86fb..e48533ce597 100644 --- a/examples/bench/bench.cpp +++ b/examples/bench/bench.cpp @@ -1,5 +1,5 @@ #include "whisper.h" -#if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) +#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) #include "whisper-flat.h" #endif @@ -64,7 +64,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para static int whisper_bench_full(const whisper_params & params) { // whisper init - #if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) + #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) whisper_flat_backend_load_all(); #endif diff --git a/examples/cli/cli.cpp b/examples/cli/cli.cpp index 07ffcac93c2..46217672748 100644 --- a/examples/cli/cli.cpp +++ b/examples/cli/cli.cpp @@ -2,7 +2,7 @@ #include "common-whisper.h" #include "whisper.h" -#if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) +#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) #include "whisper-flat.h" #endif #include "grammar-parser.h" @@ -1007,7 +1007,7 @@ int main(int argc, char ** argv) { // whisper init - #if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) + #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) whisper_flat_backend_load_all(); #endif diff --git a/examples/command/command.cpp b/examples/command/command.cpp index 77abf59bd30..3cbc892a206 100644 --- a/examples/command/command.cpp +++ b/examples/command/command.cpp @@ -9,7 +9,7 @@ #include "common-sdl.h" #include "common.h" #include "whisper.h" -#if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) +#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) #include "whisper-flat.h" #endif #include "grammar-parser.h" @@ -695,7 +695,7 @@ int main(int argc, char ** argv) { // whisper init - #if defined(GGML_BACKEND_DL) and defined(BINDINGS_FLAT) + #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) whisper_flat_backend_load_all(); #endif From 7ddd2f3504441cc7f00bfa3d850942e349190318 Mon Sep 17 00:00:00 2001 From: peardox Date: Mon, 28 Apr 2025 18:38:39 +0100 Subject: [PATCH 11/34] Deployment tests --- src/whisper.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/whisper.cpp b/src/whisper.cpp index 8bec3e2258b..bbff38e3764 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -208,14 +208,14 @@ static bool ggml_graph_compute_helper( return t; } +#if !defined(BINDINGS_FLAT) static void whisper_load_backends() { - #if !defined(GGML_BACKEND_DL) or !defined(BINDINGS_FLAT) static std::once_flag flag; std::call_once(flag, []() { ggml_backend_load_all(); }); - #endif } +#endif // TODO: move these functions to ggml-base with support for ggml-backend? @@ -1313,7 +1313,7 @@ static size_t aheads_masks_nbytes(struct whisper_aheads_masks & aheads_masks) { static ggml_backend_t whisper_backend_init_gpu(const whisper_context_params & params) { ggml_log_set(g_state.log_callback, g_state.log_callback_user_data); - #if !defined(GGML_BACKEND_DL) or !defined(BINDINGS_FLAT) + #if !defined(BINDINGS_FLAT) whisper_load_backends(); #endif @@ -4323,7 +4323,7 @@ static int whisper_has_openvino(void) { const char * whisper_print_system_info(void) { static std::string s; - #if !defined(GGML_BACKEND_DL) or !defined(BINDINGS_FLAT) + #if !defined(BINDINGS_FLAT) whisper_load_backends(); #endif @@ -6780,7 +6780,7 @@ WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads) { } WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) { - #if !defined(GGML_BACKEND_DL) or !defined(BINDINGS_FLAT) + #if !defined(BINDINGS_FLAT) whisper_load_backends(); #endif From 65d7b749f4e687c5a5c1a9273c01b1ed91726618 Mon Sep 17 00:00:00 2001 From: peardox Date: Mon, 28 Apr 2025 20:05:49 +0100 Subject: [PATCH 12/34] fix cmakelists.txt --- examples/bench/bench.cpp | 3 ++- src/CMakeLists.txt | 1 + src/whisper.cpp | 8 ++++---- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp index e48533ce597..88bf2b586ea 100644 --- a/examples/bench/bench.cpp +++ b/examples/bench/bench.cpp @@ -65,7 +65,8 @@ static int whisper_bench_full(const whisper_params & params) { // whisper init #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) - whisper_flat_backend_load_all(); + //whisper_flat_backend_load_all(); + ggml_backend_try_load_best("cpu", nullptr); #endif struct whisper_context_params cparams = whisper_context_default_params(); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5f90e81a3ac..2509517ce99 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -114,6 +114,7 @@ if(BINDINGS_FLAT) ) list(APPEND WHISPER_LIBRARY_SOURCES ${FLAT_WHISPER_SOURCES}) + set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DBINDINGS_FLAT) endif() add_library(whisper diff --git a/src/whisper.cpp b/src/whisper.cpp index bbff38e3764..7175dd1aade 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -208,7 +208,7 @@ static bool ggml_graph_compute_helper( return t; } -#if !defined(BINDINGS_FLAT) +#ifndef BINDINGS_FLAT static void whisper_load_backends() { static std::once_flag flag; std::call_once(flag, []() { @@ -1313,7 +1313,7 @@ static size_t aheads_masks_nbytes(struct whisper_aheads_masks & aheads_masks) { static ggml_backend_t whisper_backend_init_gpu(const whisper_context_params & params) { ggml_log_set(g_state.log_callback, g_state.log_callback_user_data); - #if !defined(BINDINGS_FLAT) + #ifndef BINDINGS_FLAT whisper_load_backends(); #endif @@ -4323,7 +4323,7 @@ static int whisper_has_openvino(void) { const char * whisper_print_system_info(void) { static std::string s; - #if !defined(BINDINGS_FLAT) + #ifndef BINDINGS_FLAT whisper_load_backends(); #endif @@ -6780,7 +6780,7 @@ WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads) { } WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) { - #if !defined(BINDINGS_FLAT) + #ifndef BINDINGS_FLAT whisper_load_backends(); #endif From 53943bb941c5646af3805002db41072159679436 Mon Sep 17 00:00:00 2001 From: peardox Date: Mon, 28 Apr 2025 21:09:24 +0100 Subject: [PATCH 13/34] Fix nullptr --- src/whisper.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/whisper.cpp b/src/whisper.cpp index 7175dd1aade..2d13e993a38 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -1323,6 +1323,9 @@ static ggml_backend_t whisper_backend_init_gpu(const whisper_context_params & pa if (params.use_gpu) { for (size_t i = 0; i < ggml_backend_dev_count(); ++i) { ggml_backend_dev_t dev_cur = ggml_backend_dev_get(i); + if(dev_cur == nullptr) { + continue; + } if (ggml_backend_dev_type(dev_cur) == GGML_BACKEND_DEVICE_TYPE_GPU) { if (cnt == 0 || cnt == params.gpu_device) { dev = dev_cur; @@ -1361,6 +1364,9 @@ static std::vector whisper_backend_init(const whisper_context_pa // ACCEL backends for (size_t i = 0; i < ggml_backend_dev_count(); ++i) { ggml_backend_dev_t dev = ggml_backend_dev_get(i); + if(dev == nullptr) { + continue; + } if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_ACCEL) { WHISPER_LOG_INFO("%s: using %s backend\n", __func__, ggml_backend_dev_name(dev)); ggml_backend_t backend = ggml_backend_dev_init(dev, nullptr); @@ -1392,6 +1398,9 @@ static buft_list_t make_buft_list(whisper_context_params & params) { int cnt = 0; for (size_t i = 0; i < ggml_backend_dev_count(); ++i) { ggml_backend_dev_t dev = ggml_backend_dev_get(i); + if(dev == nullptr) { + continue; + } if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_GPU) { if (cnt == 0 || cnt == params.gpu_device) { auto * buft = ggml_backend_dev_buffer_type(dev); From 2ce178bf9bf487288b537c0fc186c4a567a005d0 Mon Sep 17 00:00:00 2001 From: peardox Date: Tue, 29 Apr 2025 09:23:26 +0100 Subject: [PATCH 14/34] Merge Timings into Activity --- CMakeLists.txt | 3 ++- examples/bench/bench.cpp | 4 +-- examples/cli/cli.cpp | 4 +-- examples/command/command.cpp | 4 +-- examples/server/server.cpp | 4 +-- examples/stream/stream.cpp | 4 +-- examples/talk-llama/talk-llama.cpp | 4 +-- examples/wchess/wchess.cmd/wchess.cmd.cpp | 4 +-- include/whisper.h | 17 ++++++++++++- src/whisper-flat.cpp | 6 +++-- src/whisper-flat.h | 4 ++- src/whisper.cpp | 31 +++++++++++++++-------- 12 files changed, 59 insertions(+), 30 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4fec3332aa3..bb3c680afcb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,7 +71,8 @@ option(WHISPER_FATAL_WARNINGS "whisper: enable -Werror flag" OFF) option(WHISPER_USE_SYSTEM_GGML "whisper: use system-installed GGML library" OFF) # flat bindings -option(BINDINGS_FLAT "Add extra flat definitions to Whisper + GGML" OFF) +option(BINDINGS_FLAT "Add extra flat definitions to Whisper + GGML" OFF) +option(GGML_BINDINGS_FLAT "Add extra flat definitions to Examples" ${BINDINGS_FLAT}) # sanitizers option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF) diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp index 88bf2b586ea..360df1047a8 100644 --- a/examples/bench/bench.cpp +++ b/examples/bench/bench.cpp @@ -1,5 +1,5 @@ #include "whisper.h" -#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) +#ifdef GGML_BINDINGS_FLAT #include "whisper-flat.h" #endif @@ -64,7 +64,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para static int whisper_bench_full(const whisper_params & params) { // whisper init - #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) + #ifdef GGML_BINDINGS_FLAT //whisper_flat_backend_load_all(); ggml_backend_try_load_best("cpu", nullptr); #endif diff --git a/examples/cli/cli.cpp b/examples/cli/cli.cpp index 46217672748..2e0d0412eaf 100644 --- a/examples/cli/cli.cpp +++ b/examples/cli/cli.cpp @@ -2,7 +2,7 @@ #include "common-whisper.h" #include "whisper.h" -#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) +#ifdef GGML_BINDINGS_FLAT #include "whisper-flat.h" #endif #include "grammar-parser.h" @@ -1007,7 +1007,7 @@ int main(int argc, char ** argv) { // whisper init - #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) + #ifdef GGML_BINDINGS_FLAT whisper_flat_backend_load_all(); #endif diff --git a/examples/command/command.cpp b/examples/command/command.cpp index 3cbc892a206..23d722256fa 100644 --- a/examples/command/command.cpp +++ b/examples/command/command.cpp @@ -9,7 +9,7 @@ #include "common-sdl.h" #include "common.h" #include "whisper.h" -#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) +#ifdef GGML_BINDINGS_FLAT #include "whisper-flat.h" #endif #include "grammar-parser.h" @@ -695,7 +695,7 @@ int main(int argc, char ** argv) { // whisper init - #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) + #ifdef GGML_BINDINGS_FLAT whisper_flat_backend_load_all(); #endif diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 230f6980003..7f510c3e0a3 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2,7 +2,7 @@ #include "common-whisper.h" #include "whisper.h" -#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) +#ifdef GGML_BINDINGS_FLAT #include "whisper-flat.h" #endif #include "httplib.h" @@ -547,7 +547,7 @@ int main(int argc, char ** argv) { check_ffmpeg_availibility(); } // whisper init - #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) + #ifdef GGML_BINDINGS_FLAT whisper_flat_backend_load_all(); #endif diff --git a/examples/stream/stream.cpp b/examples/stream/stream.cpp index 2505ffb79d9..73249d67968 100644 --- a/examples/stream/stream.cpp +++ b/examples/stream/stream.cpp @@ -6,7 +6,7 @@ #include "common.h" #include "common-whisper.h" #include "whisper.h" -#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) +#ifdef GGML_BINDINGS_FLAT #include "whisper-flat.h" #endif @@ -158,7 +158,7 @@ int main(int argc, char ** argv) { exit(0); } - #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) + #ifdef GGML_BINDINGS_FLAT whisper_flat_backend_load_all(); #endif diff --git a/examples/talk-llama/talk-llama.cpp b/examples/talk-llama/talk-llama.cpp index 3b131590cd5..13cdb041df0 100644 --- a/examples/talk-llama/talk-llama.cpp +++ b/examples/talk-llama/talk-llama.cpp @@ -5,7 +5,7 @@ #include "common.h" #include "common-whisper.h" #include "whisper.h" -#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) +#ifdef GGML_BINDINGS_FLAT #include "whisper-flat.h" #endif #include "llama.h" @@ -290,7 +290,7 @@ int main(int argc, char ** argv) { // whisper init - #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) + #ifdef GGML_BINDINGS_FLAT whisper_flat_backend_load_all(); #endif diff --git a/examples/wchess/wchess.cmd/wchess.cmd.cpp b/examples/wchess/wchess.cmd/wchess.cmd.cpp index 7f60f262c7f..33a6cf0d458 100644 --- a/examples/wchess/wchess.cmd/wchess.cmd.cpp +++ b/examples/wchess/wchess.cmd/wchess.cmd.cpp @@ -7,7 +7,7 @@ #include "WChess.h" #include "common-sdl.h" -#if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) +#ifdef GGML_BINDINGS_FLAT #include "whisper-flat.h" #endif #include @@ -185,7 +185,7 @@ int main(int argc, char ** argv) { // whisper init - #if defined(GGML_BACKEND_DL) or defined(BINDINGS_FLAT) + #ifdef GGML_BINDINGS_FLAT whisper_flat_backend_load_all(); #endif diff --git a/include/whisper.h b/include/whisper.h index 37b43b2c073..bcd448b8240 100644 --- a/include/whisper.h +++ b/include/whisper.h @@ -670,12 +670,27 @@ extern "C" { WHISPER_API float whisper_full_get_segment_no_speech_prob_from_state(struct whisper_state * state, int i_segment); // For whisper-flat.cpp to expose + #ifdef BINDINGS_FLAT + struct whisper_activity { + float sample_ms; + float encode_ms; + float decode_ms; + float batchd_ms; + float prompt_ms; + int32_t n_sample = 0; // number of tokens sampled + int32_t n_encode = 0; // number of encoder calls + int32_t n_decode = 0; // number of decoder calls with n_tokens == 1 (text-generation) + int32_t n_batchd = 0; // number of decoder calls with n_tokens < 16 (batch decoding) + int32_t n_prompt = 0; // number of decoder calls with n_tokens > 1 (prompt encoding) + }; + const char * whisper_get_system_info_json(void); struct whisper_state * whisper_get_state_from_context(struct whisper_context * ctx); - struct whisper_timings * whisper_get_timings_with_state(struct whisper_state * state); + struct whisper_activity * whisper_get_activity_with_state(struct whisper_state * state); ggml_backend_t whisper_get_preferred_backend(struct whisper_state * state); ggml_backend_t whisper_get_indexed_backend(struct whisper_state* state, size_t i); size_t whisper_get_backend_count(struct whisper_state* state); + #endif #ifdef __cplusplus } diff --git a/src/whisper-flat.cpp b/src/whisper-flat.cpp index 33f51d67403..9b8474af9f2 100644 --- a/src/whisper-flat.cpp +++ b/src/whisper-flat.cpp @@ -29,6 +29,7 @@ #include "whisper-flat.h" +#ifdef BINDINGS_FLAT void whisper_flat_backend_load_all(void) { ggml_backend_load_all(); } @@ -41,8 +42,8 @@ struct whisper_state * whisper_flat_get_state_from_context(struct whisper_contex return whisper_get_state_from_context(ctx); } -struct whisper_timings * whisper_flat_get_timings_with_state(struct whisper_state * state) { - return whisper_get_timings_with_state(state); +struct whisper_activity * whisper_flat_get_activity_with_state(struct whisper_state * state) { + return whisper_get_activity_with_state(state); } ggml_backend_t whisper_flat_get_preferred_backend(struct whisper_state * state) { @@ -56,3 +57,4 @@ ggml_backend_t whisper_flat_get_indexed_backend(struct whisper_state* state, siz size_t whisper_flat_get_backend_count(struct whisper_state* state) { return whisper_get_backend_count(state); } +#endif diff --git a/src/whisper-flat.h b/src/whisper-flat.h index 09909b8e91d..39429405211 100644 --- a/src/whisper-flat.h +++ b/src/whisper-flat.h @@ -20,13 +20,15 @@ extern "C" { #endif + #ifdef BINDINGS_FLAT WHISPER_FLAT_API void whisper_flat_backend_load_all(void); - WHISPER_FLAT_API struct whisper_timings * whisper_flat_get_timings_with_state(struct whisper_state * state); + WHISPER_FLAT_API struct whisper_activity * whisper_flat_get_activity_with_state(struct whisper_state * state); WHISPER_FLAT_API struct whisper_state * whisper_flat_get_state_from_context(struct whisper_context * ctx); WHISPER_FLAT_API const char * whisper_flat_get_system_info_json(void); WHISPER_FLAT_API ggml_backend_t whisper_flat_get_preferred_backend(struct whisper_state * state); WHISPER_FLAT_API ggml_backend_t whisper_flat_get_indexed_backend(struct whisper_state* state, size_t i); WHISPER_FLAT_API size_t whisper_flat_get_backend_count(struct whisper_state* state); + #endif #ifdef __cplusplus } diff --git a/src/whisper.cpp b/src/whisper.cpp index 2d13e993a38..7ea03f5e9bb 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -7566,6 +7566,7 @@ static void whisper_log_callback_default(ggml_log_level level, const char * text fflush(stderr); } +#ifdef BINDINGS_FLAT // whisper_get_system_info_json // Returns system info as json, useful for language bindings // NOTE : While testing features->value always returned an int. @@ -7625,19 +7626,26 @@ struct whisper_state * whisper_get_state_from_context(struct whisper_context * c return ctx->state; } -// whisper_get_timings_with_state -// Just a version of whisper_get_timings that takes state as a parameter -struct whisper_timings * whisper_get_timings_with_state(struct whisper_state * state) { +// whisper_get_activity_with_state +// As the data is in a c++ specific struct +struct whisper_activity * whisper_get_activity_with_state(struct whisper_state * state) { if (state == nullptr) { return nullptr; } - whisper_timings * timings = new whisper_timings; - timings->sample_ms = 1e-3f * state->t_sample_us / std::max(1, state->n_sample); - timings->encode_ms = 1e-3f * state->t_encode_us / std::max(1, state->n_encode); - timings->decode_ms = 1e-3f * state->t_decode_us / std::max(1, state->n_decode); - timings->batchd_ms = 1e-3f * state->t_batchd_us / std::max(1, state->n_batchd); - timings->prompt_ms = 1e-3f * state->t_prompt_us / std::max(1, state->n_prompt); - return timings; + whisper_activity * activity = new whisper_activity; + + activity->sample_ms = 1e-3f * state->t_sample_us / std::max(1, state->n_sample); + activity->encode_ms = 1e-3f * state->t_encode_us / std::max(1, state->n_encode); + activity->decode_ms = 1e-3f * state->t_decode_us / std::max(1, state->n_decode); + activity->batchd_ms = 1e-3f * state->t_batchd_us / std::max(1, state->n_batchd); + activity->prompt_ms = 1e-3f * state->t_prompt_us / std::max(1, state->n_prompt); + activity->n_sample = state->n_sample; + activity->n_encode = state->n_encode; + activity->n_decode = state->n_decode; + activity->n_batchd = state->n_batchd; + activity->n_prompt = state->n_prompt; + + return activity; } ggml_backend_t whisper_get_preferred_backend(struct whisper_state * state) { @@ -7660,4 +7668,5 @@ ggml_backend_t whisper_get_indexed_backend(struct whisper_state* state, size_t i size_t whisper_get_backend_count(struct whisper_state* state) { return state->backends.size(); -} \ No newline at end of file +} +#endif From 0e1ebc0c9223451953a1bfcfee4f3c40767a58c9 Mon Sep 17 00:00:00 2001 From: peardox Date: Tue, 29 Apr 2025 10:13:25 +0100 Subject: [PATCH 15/34] Show ggml_backend_try_load_best in debug --- examples/bench/bench.cpp | 12 ++++++++---- ggml/src/ggml-backend-reg.cpp | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp index 360df1047a8..a06928e2fa9 100644 --- a/examples/bench/bench.cpp +++ b/examples/bench/bench.cpp @@ -64,10 +64,14 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para static int whisper_bench_full(const whisper_params & params) { // whisper init - #ifdef GGML_BINDINGS_FLAT - //whisper_flat_backend_load_all(); - ggml_backend_try_load_best("cpu", nullptr); - #endif + // #ifdef GGML_BINDINGS_FLAT + // whisper_flat_backend_load_all(); + if(ggml_backend_try_load_best("cpu", nullptr)) { + fprintf(stderr, "ggml_backend_try_load_best(cpu, nullptr);"); + } else { + fprintf(stderr, "FAILED : ggml_backend_try_load_best(cpu, nullptr);"); + } + // #endif struct whisper_context_params cparams = whisper_context_default_params(); diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp index 40ba454b695..66d3542cacc 100644 --- a/ggml/src/ggml-backend-reg.cpp +++ b/ggml/src/ggml-backend-reg.cpp @@ -591,6 +591,6 @@ ggml_backend_reg_t ggml_backend_try_load_best(const char * name, const char * di #else bool silent = false; #endif - + fprintf(stderr, "Performing ggml_backend_try_load_best(%s)\n", name); return ggml_backend_load_best(name, silent, dir_path); } From d381617e6d9ce2b06def0ad73ea4ee675dc44f56 Mon Sep 17 00:00:00 2001 From: peardox Date: Tue, 29 Apr 2025 16:26:26 +0100 Subject: [PATCH 16/34] Show some var sizes --- src/whisper.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/whisper.cpp b/src/whisper.cpp index 7ea03f5e9bb..48b5df90a16 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -3732,10 +3732,10 @@ struct whisper_context * whisper_init_with_params_no_state(struct whisper_model_ params.dtw_token_timestamps = false; } - WHISPER_LOG_INFO("%s: use gpu = %d\n", __func__, params.use_gpu); - WHISPER_LOG_INFO("%s: flash attn = %d\n", __func__, params.flash_attn); - WHISPER_LOG_INFO("%s: gpu_device = %d\n", __func__, params.gpu_device); - WHISPER_LOG_INFO("%s: dtw = %d\n", __func__, params.dtw_token_timestamps); + WHISPER_LOG_INFO("%s: use gpu = %d (size : %d)\n", __func__, params.use_gpu, sizeof(params.use_gpu)); + WHISPER_LOG_INFO("%s: flash attn = %d (size : %d)\n", __func__, params.flash_attn, sizeof(params.flash_attn)); + WHISPER_LOG_INFO("%s: gpu_device = %d (size : %d)\n", __func__, params.gpu_device, sizeof(params.gpu_device)); + WHISPER_LOG_INFO("%s: dtw = %d (size : %d)\n", __func__, params.dtw_token_timestamps, sizeof(params.dtw_token_timestamps)); WHISPER_LOG_INFO("%s: devices = %zu\n", __func__, ggml_backend_dev_count()); WHISPER_LOG_INFO("%s: backends = %zu\n", __func__, ggml_backend_reg_count()); From 6b51a3bfe04a65780b92b9ab3ca8cf6cb7d1c6eb Mon Sep 17 00:00:00 2001 From: peardox Date: Tue, 29 Apr 2025 17:46:03 +0100 Subject: [PATCH 17/34] Remove sizez again --- src/whisper.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/whisper.cpp b/src/whisper.cpp index 48b5df90a16..7ea03f5e9bb 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -3732,10 +3732,10 @@ struct whisper_context * whisper_init_with_params_no_state(struct whisper_model_ params.dtw_token_timestamps = false; } - WHISPER_LOG_INFO("%s: use gpu = %d (size : %d)\n", __func__, params.use_gpu, sizeof(params.use_gpu)); - WHISPER_LOG_INFO("%s: flash attn = %d (size : %d)\n", __func__, params.flash_attn, sizeof(params.flash_attn)); - WHISPER_LOG_INFO("%s: gpu_device = %d (size : %d)\n", __func__, params.gpu_device, sizeof(params.gpu_device)); - WHISPER_LOG_INFO("%s: dtw = %d (size : %d)\n", __func__, params.dtw_token_timestamps, sizeof(params.dtw_token_timestamps)); + WHISPER_LOG_INFO("%s: use gpu = %d\n", __func__, params.use_gpu); + WHISPER_LOG_INFO("%s: flash attn = %d\n", __func__, params.flash_attn); + WHISPER_LOG_INFO("%s: gpu_device = %d\n", __func__, params.gpu_device); + WHISPER_LOG_INFO("%s: dtw = %d\n", __func__, params.dtw_token_timestamps); WHISPER_LOG_INFO("%s: devices = %zu\n", __func__, ggml_backend_dev_count()); WHISPER_LOG_INFO("%s: backends = %zu\n", __func__, ggml_backend_reg_count()); From 660bee1e244c0bb0ca0f6906615b5fcfaaf8e245 Mon Sep 17 00:00:00 2001 From: peardox Date: Wed, 30 Apr 2025 16:42:36 +0100 Subject: [PATCH 18/34] Enable BACKEND_FLAT in 6 demos --- examples/CMakeLists.txt | 6 ++++++ examples/bench/bench.cpp | 13 ++++--------- examples/cli/cli.cpp | 4 ++-- examples/command/command.cpp | 4 ++-- examples/server/server.cpp | 4 ++-- examples/stream/stream.cpp | 4 ++-- examples/talk-llama/talk-llama.cpp | 4 ++-- examples/wchess/wchess.cmd/wchess.cmd.cpp | 4 ++-- ggml/src/CMakeLists.txt | 2 +- ggml/src/ggml-flat.cpp | 4 ++++ ggml/src/ggml-flat.h | 4 +++- 11 files changed, 30 insertions(+), 23 deletions(-) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index e4265affe97..0adebb5b6ab 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -91,6 +91,12 @@ target_include_directories(json_cpp INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) # examples +if(BINDINGS_FLAT) + message(STATUS "Adding FLAT binding to examples") + add_compile_options(-DBINDINGS_FLAT) +endif() + + include_directories(${CMAKE_CURRENT_SOURCE_DIR}) if (EMSCRIPTEN) diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp index a06928e2fa9..9547735127a 100644 --- a/examples/bench/bench.cpp +++ b/examples/bench/bench.cpp @@ -1,5 +1,5 @@ #include "whisper.h" -#ifdef GGML_BINDINGS_FLAT +#ifdef BINDINGS_FLAT #include "whisper-flat.h" #endif @@ -64,14 +64,9 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para static int whisper_bench_full(const whisper_params & params) { // whisper init - // #ifdef GGML_BINDINGS_FLAT - // whisper_flat_backend_load_all(); - if(ggml_backend_try_load_best("cpu", nullptr)) { - fprintf(stderr, "ggml_backend_try_load_best(cpu, nullptr);"); - } else { - fprintf(stderr, "FAILED : ggml_backend_try_load_best(cpu, nullptr);"); - } - // #endif + #ifdef BINDINGS_FLAT + whisper_flat_backend_load_all(); + #endif struct whisper_context_params cparams = whisper_context_default_params(); diff --git a/examples/cli/cli.cpp b/examples/cli/cli.cpp index 2e0d0412eaf..52459de4d38 100644 --- a/examples/cli/cli.cpp +++ b/examples/cli/cli.cpp @@ -2,7 +2,7 @@ #include "common-whisper.h" #include "whisper.h" -#ifdef GGML_BINDINGS_FLAT +#ifdef BINDINGS_FLAT #include "whisper-flat.h" #endif #include "grammar-parser.h" @@ -1007,7 +1007,7 @@ int main(int argc, char ** argv) { // whisper init - #ifdef GGML_BINDINGS_FLAT + #ifdef BINDINGS_FLAT whisper_flat_backend_load_all(); #endif diff --git a/examples/command/command.cpp b/examples/command/command.cpp index 23d722256fa..33a4c289d9c 100644 --- a/examples/command/command.cpp +++ b/examples/command/command.cpp @@ -9,7 +9,7 @@ #include "common-sdl.h" #include "common.h" #include "whisper.h" -#ifdef GGML_BINDINGS_FLAT +#ifdef BINDINGS_FLAT #include "whisper-flat.h" #endif #include "grammar-parser.h" @@ -695,7 +695,7 @@ int main(int argc, char ** argv) { // whisper init - #ifdef GGML_BINDINGS_FLAT + #ifdef BINDINGS_FLAT whisper_flat_backend_load_all(); #endif diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 7f510c3e0a3..2c8966823aa 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2,7 +2,7 @@ #include "common-whisper.h" #include "whisper.h" -#ifdef GGML_BINDINGS_FLAT +#ifdef BINDINGS_FLAT #include "whisper-flat.h" #endif #include "httplib.h" @@ -547,7 +547,7 @@ int main(int argc, char ** argv) { check_ffmpeg_availibility(); } // whisper init - #ifdef GGML_BINDINGS_FLAT + #ifdef BINDINGS_FLAT whisper_flat_backend_load_all(); #endif diff --git a/examples/stream/stream.cpp b/examples/stream/stream.cpp index 73249d67968..085a44fd9ef 100644 --- a/examples/stream/stream.cpp +++ b/examples/stream/stream.cpp @@ -6,7 +6,7 @@ #include "common.h" #include "common-whisper.h" #include "whisper.h" -#ifdef GGML_BINDINGS_FLAT +#ifdef BINDINGS_FLAT #include "whisper-flat.h" #endif @@ -158,7 +158,7 @@ int main(int argc, char ** argv) { exit(0); } - #ifdef GGML_BINDINGS_FLAT + #ifdef BINDINGS_FLAT whisper_flat_backend_load_all(); #endif diff --git a/examples/talk-llama/talk-llama.cpp b/examples/talk-llama/talk-llama.cpp index 13cdb041df0..21d3a950a8a 100644 --- a/examples/talk-llama/talk-llama.cpp +++ b/examples/talk-llama/talk-llama.cpp @@ -5,7 +5,7 @@ #include "common.h" #include "common-whisper.h" #include "whisper.h" -#ifdef GGML_BINDINGS_FLAT +#ifdef BINDINGS_FLAT #include "whisper-flat.h" #endif #include "llama.h" @@ -290,7 +290,7 @@ int main(int argc, char ** argv) { // whisper init - #ifdef GGML_BINDINGS_FLAT + #ifdef BINDINGS_FLAT whisper_flat_backend_load_all(); #endif diff --git a/examples/wchess/wchess.cmd/wchess.cmd.cpp b/examples/wchess/wchess.cmd/wchess.cmd.cpp index 33a6cf0d458..a1a154a9dcc 100644 --- a/examples/wchess/wchess.cmd/wchess.cmd.cpp +++ b/examples/wchess/wchess.cmd/wchess.cmd.cpp @@ -7,7 +7,7 @@ #include "WChess.h" #include "common-sdl.h" -#ifdef GGML_BINDINGS_FLAT +#ifdef BINDINGS_FLAT #include "whisper-flat.h" #endif #include @@ -185,7 +185,7 @@ int main(int argc, char ** argv) { // whisper init - #ifdef GGML_BINDINGS_FLAT + #ifdef BINDINGS_FLAT whisper_flat_backend_load_all(); #endif diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index 45af1d3e959..3fd0d93ef5e 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -217,8 +217,8 @@ if(BINDINGS_FLAT) set(FLAT_GGML_SOURCES ggml-flat.cpp ) - list(APPEND GGML_LIBRARY_SOURCES ${FLAT_GGML_SOURCES}) + add_compile_options(-DGGML_BINDINGS_FLAT) endif() add_library(ggml diff --git a/ggml/src/ggml-flat.cpp b/ggml/src/ggml-flat.cpp index ea727ecbec9..c39b4a92716 100644 --- a/ggml/src/ggml-flat.cpp +++ b/ggml/src/ggml-flat.cpp @@ -29,6 +29,10 @@ #include "ggml-flat.h" +#ifdef GGML_BINDINGS_FLAT + void ggml_flat_test(void) { + } +#endif diff --git a/ggml/src/ggml-flat.h b/ggml/src/ggml-flat.h index 7537d7249f2..cacb68e7223 100644 --- a/ggml/src/ggml-flat.h +++ b/ggml/src/ggml-flat.h @@ -21,7 +21,9 @@ extern "C" { #endif - + #ifdef GGML_BINDINGS_FLAT + GGML_FLAT_API void ggml_flat_test(void); + #endif #ifdef __cplusplus } From 94faab8682704b16f77c53153e7e38ac46be20f8 Mon Sep 17 00:00:00 2001 From: peardox Date: Wed, 30 Apr 2025 17:19:15 +0100 Subject: [PATCH 19/34] ggml_backend_try_load_best only in BINDINGS_FLAT --- ggml/include/ggml-backend.h | 1 - ggml/src/ggml-backend-reg.cpp | 8 +++++++- ggml/src/ggml-flat.cpp | 5 ++--- ggml/src/ggml-flat.h | 2 +- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h index b96a1f8579d..65b7c14d64e 100644 --- a/ggml/include/ggml-backend.h +++ b/ggml/include/ggml-backend.h @@ -349,7 +349,6 @@ extern "C" { GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size); GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void); - GGML_API ggml_backend_reg_t ggml_backend_try_load_best(const char * name, const char * dir_path); #ifdef __cplusplus } diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp index 66d3542cacc..4456e81f598 100644 --- a/ggml/src/ggml-backend-reg.cpp +++ b/ggml/src/ggml-backend-reg.cpp @@ -24,6 +24,10 @@ # include #endif +#ifdef GGML_BINDINGS_FLAT +#include "ggml-flat.h" +#endif + // Backend registry #ifdef GGML_USE_CPU #include "ggml-cpu.h" @@ -585,8 +589,9 @@ void ggml_backend_load_all_from_path(const char * dir_path) { } } +#if defined(GGML_BINDINGS_FLAT) ggml_backend_reg_t ggml_backend_try_load_best(const char * name, const char * dir_path) { -#ifdef NDEBUG +#ifdef defined(NDEBUG) bool silent = true; #else bool silent = false; @@ -594,3 +599,4 @@ ggml_backend_reg_t ggml_backend_try_load_best(const char * name, const char * di fprintf(stderr, "Performing ggml_backend_try_load_best(%s)\n", name); return ggml_backend_load_best(name, silent, dir_path); } +#endif diff --git a/ggml/src/ggml-flat.cpp b/ggml/src/ggml-flat.cpp index c39b4a92716..3aa22e7ee3b 100644 --- a/ggml/src/ggml-flat.cpp +++ b/ggml/src/ggml-flat.cpp @@ -30,10 +30,9 @@ #include "ggml-flat.h" #ifdef GGML_BINDINGS_FLAT - void ggml_flat_test(void) { - } -#endif +// ggml_backend_reg_t ggml_backend_try_load_best(const char * name, const char * dir_path); +#endif diff --git a/ggml/src/ggml-flat.h b/ggml/src/ggml-flat.h index cacb68e7223..bb6bb1c055a 100644 --- a/ggml/src/ggml-flat.h +++ b/ggml/src/ggml-flat.h @@ -22,7 +22,7 @@ extern "C" { #endif #ifdef GGML_BINDINGS_FLAT - GGML_FLAT_API void ggml_flat_test(void); + GGML_FLAT_API ggml_backend_reg_t ggml_backend_try_load_best(const char * name, const char * dir_path); #endif #ifdef __cplusplus From 5a3b4673f79c4aef61ac82ba21b7b3d92b361f4d Mon Sep 17 00:00:00 2001 From: peardox Date: Wed, 30 Apr 2025 19:02:19 +0100 Subject: [PATCH 20/34] Disable breaking MSVC Warning CMakeFiles.txt if GGML_CPU_ALL_VARIANTS --- CMakeLists.txt | 57 ++++++++++++++++++++++++--------------------- ggml/CMakeLists.txt | 29 ++++++++++++----------- 2 files changed, 46 insertions(+), 40 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ead5136806e..2a6c02763c8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -218,32 +218,35 @@ if (WHISPER_BUILD_EXAMPLES) add_subdirectory(examples) endif() -if (MSVC) - set(MSVC_WARNING_FLAGS - /wd4101 # Unreferenced local variable - /wd4005 # Macro redefinition - /wd4065 # switch statement contains 'default' but no 'case' labels - /wd4267 # Conversion from 'size_t' to a smaller type, possible loss of data - /wd4244 # Conversion from one type to another type, possible loss of ata - /wd4805 # Unsafe mix of type - /wd4305 # Truncation from 'type1' to 'type2' (often double to float) - /wd4996 # Function or variable may be unsafe/deprecated - ) - function(disable_msvc_warnings target_name) - target_compile_options(${target_name} PRIVATE ${MSVC_WARNING_FLAGS}) - endfunction() - - if (WHISPER_BUILD_EXAMPLES) - disable_msvc_warnings(common) - disable_msvc_warnings(common-sdl) - disable_msvc_warnings(lsp) - disable_msvc_warnings(wchess-core) - disable_msvc_warnings(whisper-command) - disable_msvc_warnings(whisper-cli) - disable_msvc_warnings(whisper-server) - disable_msvc_warnings(whisper-stream) - disable_msvc_warnings(whisper-talk-llama) - disable_msvc_warnings(whisper-bench) - disable_msvc_warnings(quantize) +### The following breaks GGML_CPU_ALL_VARIANTS ### +if(NOT DEFINED GGML_CPU_ALL_VARIANTS) + if (MSVC) + set(MSVC_WARNING_FLAGS + /wd4101 # Unreferenced local variable + /wd4005 # Macro redefinition + /wd4065 # switch statement contains 'default' but no 'case' labels + /wd4267 # Conversion from 'size_t' to a smaller type, possible loss of data + /wd4244 # Conversion from one type to another type, possible loss of ata + /wd4805 # Unsafe mix of type + /wd4305 # Truncation from 'type1' to 'type2' (often double to float) + /wd4996 # Function or variable may be unsafe/deprecated + ) + function(disable_msvc_warnings target_name) + target_compile_options(${target_name} PRIVATE ${MSVC_WARNING_FLAGS}) + endfunction() + + if (WHISPER_BUILD_EXAMPLES) + disable_msvc_warnings(common) + disable_msvc_warnings(common-sdl) + disable_msvc_warnings(lsp) + disable_msvc_warnings(wchess-core) + disable_msvc_warnings(whisper-command) + disable_msvc_warnings(whisper-cli) + disable_msvc_warnings(whisper-server) + disable_msvc_warnings(whisper-stream) + disable_msvc_warnings(whisper-talk-llama) + disable_msvc_warnings(whisper-bench) + disable_msvc_warnings(quantize) + endif() endif() endif() diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index e632af010c7..703a2f694b9 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -361,17 +361,20 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake ${CMAKE_CURRENT_BINARY_DIR}/ggml-version.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ggml) -if (MSVC) - set(MSVC_WARNING_FLAGS - /wd4005 # Macro redefinition - /wd4244 # Conversion from one type to another type, possible loss of data - /wd4267 # Conversion from 'size_t' to a smaller type, possible loss of data - ) - function(disable_msvc_warnings target_name) - target_compile_options(${target_name} PRIVATE ${MSVC_WARNING_FLAGS}) - endfunction() - - disable_msvc_warnings(ggml-base) - disable_msvc_warnings(ggml) - disable_msvc_warnings(ggml-cpu) +### The following breaks GGML_CPU_ALL_VARIANTS ### +if(NOT DEFINED GGML_CPU_ALL_VARIANTS) + if (MSVC) + set(MSVC_WARNING_FLAGS + /wd4005 # Macro redefinition + /wd4244 # Conversion from one type to another type, possible loss of data + /wd4267 # Conversion from 'size_t' to a smaller type, possible loss of data + ) + function(disable_msvc_warnings target_name) + target_compile_options(${target_name} PRIVATE ${MSVC_WARNING_FLAGS}) + endfunction() + + disable_msvc_warnings(ggml-base) + disable_msvc_warnings(ggml) + disable_msvc_warnings(ggml-cpu) + endif() endif() From f65e8b1e1d2b064338adb5701e2b84dc65721a86 Mon Sep 17 00:00:00 2001 From: peardox Date: Wed, 30 Apr 2025 22:41:56 +0100 Subject: [PATCH 21/34] Fix library load - update readme --- examples/bench/bench.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp index 9547735127a..857e8b9fed2 100644 --- a/examples/bench/bench.cpp +++ b/examples/bench/bench.cpp @@ -1,6 +1,7 @@ #include "whisper.h" #ifdef BINDINGS_FLAT #include "whisper-flat.h" +#include "../ggml/src/ggml-flat.h" #endif #include @@ -65,8 +66,11 @@ static int whisper_bench_full(const whisper_params & params) { // whisper init #ifdef BINDINGS_FLAT - whisper_flat_backend_load_all(); - #endif + // whisper_flat_backend_load_all(); + ggml_backend_try_load_best("rpc", nullptr); + ggml_backend_try_load_best("blas", nullptr); + ggml_backend_try_load_best("cpu", nullptr); +#endif struct whisper_context_params cparams = whisper_context_default_params(); From d6c7a10da00c5c0c30a3691aadefe6bdf2af2544 Mon Sep 17 00:00:00 2001 From: peardox Date: Wed, 30 Apr 2025 22:49:26 +0100 Subject: [PATCH 22/34] Oops - accidental commit - fixup --- examples/bench/bench.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp index 857e8b9fed2..a001f67a2da 100644 --- a/examples/bench/bench.cpp +++ b/examples/bench/bench.cpp @@ -66,11 +66,8 @@ static int whisper_bench_full(const whisper_params & params) { // whisper init #ifdef BINDINGS_FLAT - // whisper_flat_backend_load_all(); - ggml_backend_try_load_best("rpc", nullptr); - ggml_backend_try_load_best("blas", nullptr); - ggml_backend_try_load_best("cpu", nullptr); -#endif + whisper_flat_backend_load_all(); + #endif struct whisper_context_params cparams = whisper_context_default_params(); From dffd00b2a00fef033c0a61c69c1689909b46acf2 Mon Sep 17 00:00:00 2001 From: peardox Date: Wed, 30 Apr 2025 22:50:16 +0100 Subject: [PATCH 23/34] Oops - accidental commit - fixup2 --- examples/bench/bench.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp index a001f67a2da..9547735127a 100644 --- a/examples/bench/bench.cpp +++ b/examples/bench/bench.cpp @@ -1,7 +1,6 @@ #include "whisper.h" #ifdef BINDINGS_FLAT #include "whisper-flat.h" -#include "../ggml/src/ggml-flat.h" #endif #include From 24a8551365c6d444e49fabb2fd178f1571024fe7 Mon Sep 17 00:00:00 2001 From: peardox Date: Fri, 2 May 2025 00:14:01 +0100 Subject: [PATCH 24/34] Mend broken ifdef (one of mine) --- ggml/src/ggml-backend-reg.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp index 4456e81f598..e2c7ea367d1 100644 --- a/ggml/src/ggml-backend-reg.cpp +++ b/ggml/src/ggml-backend-reg.cpp @@ -589,13 +589,9 @@ void ggml_backend_load_all_from_path(const char * dir_path) { } } -#if defined(GGML_BINDINGS_FLAT) +#ifdef GGML_BINDINGS_FLAT ggml_backend_reg_t ggml_backend_try_load_best(const char * name, const char * dir_path) { -#ifdef defined(NDEBUG) bool silent = true; -#else - bool silent = false; -#endif fprintf(stderr, "Performing ggml_backend_try_load_best(%s)\n", name); return ggml_backend_load_best(name, silent, dir_path); } From 696f86c6406d13997fc81a0a7d9b2be9d18a87ac Mon Sep 17 00:00:00 2001 From: peardox Date: Fri, 2 May 2025 17:59:49 +0100 Subject: [PATCH 25/34] Commit resolution --- CMakeLists.txt | 57 +++++++++++++++++------------------ ggml/src/ggml-backend-reg.cpp | 3 ++ 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2a6c02763c8..ead5136806e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -218,35 +218,32 @@ if (WHISPER_BUILD_EXAMPLES) add_subdirectory(examples) endif() -### The following breaks GGML_CPU_ALL_VARIANTS ### -if(NOT DEFINED GGML_CPU_ALL_VARIANTS) - if (MSVC) - set(MSVC_WARNING_FLAGS - /wd4101 # Unreferenced local variable - /wd4005 # Macro redefinition - /wd4065 # switch statement contains 'default' but no 'case' labels - /wd4267 # Conversion from 'size_t' to a smaller type, possible loss of data - /wd4244 # Conversion from one type to another type, possible loss of ata - /wd4805 # Unsafe mix of type - /wd4305 # Truncation from 'type1' to 'type2' (often double to float) - /wd4996 # Function or variable may be unsafe/deprecated - ) - function(disable_msvc_warnings target_name) - target_compile_options(${target_name} PRIVATE ${MSVC_WARNING_FLAGS}) - endfunction() - - if (WHISPER_BUILD_EXAMPLES) - disable_msvc_warnings(common) - disable_msvc_warnings(common-sdl) - disable_msvc_warnings(lsp) - disable_msvc_warnings(wchess-core) - disable_msvc_warnings(whisper-command) - disable_msvc_warnings(whisper-cli) - disable_msvc_warnings(whisper-server) - disable_msvc_warnings(whisper-stream) - disable_msvc_warnings(whisper-talk-llama) - disable_msvc_warnings(whisper-bench) - disable_msvc_warnings(quantize) - endif() +if (MSVC) + set(MSVC_WARNING_FLAGS + /wd4101 # Unreferenced local variable + /wd4005 # Macro redefinition + /wd4065 # switch statement contains 'default' but no 'case' labels + /wd4267 # Conversion from 'size_t' to a smaller type, possible loss of data + /wd4244 # Conversion from one type to another type, possible loss of ata + /wd4805 # Unsafe mix of type + /wd4305 # Truncation from 'type1' to 'type2' (often double to float) + /wd4996 # Function or variable may be unsafe/deprecated + ) + function(disable_msvc_warnings target_name) + target_compile_options(${target_name} PRIVATE ${MSVC_WARNING_FLAGS}) + endfunction() + + if (WHISPER_BUILD_EXAMPLES) + disable_msvc_warnings(common) + disable_msvc_warnings(common-sdl) + disable_msvc_warnings(lsp) + disable_msvc_warnings(wchess-core) + disable_msvc_warnings(whisper-command) + disable_msvc_warnings(whisper-cli) + disable_msvc_warnings(whisper-server) + disable_msvc_warnings(whisper-stream) + disable_msvc_warnings(whisper-talk-llama) + disable_msvc_warnings(whisper-bench) + disable_msvc_warnings(quantize) endif() endif() diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp index e2c7ea367d1..183c5d68eeb 100644 --- a/ggml/src/ggml-backend-reg.cpp +++ b/ggml/src/ggml-backend-reg.cpp @@ -109,6 +109,9 @@ struct dl_handle_deleter { static dl_handle * dl_load_library(const fs::path & path) { // suppress error dialogs for missing DLLs + #ifdef GGML_BINDINGS_FLAT // sbdbg + fprintf(stderr, "dl_load_library()%s",path.wstring().c_str()); // sbdbg + #endif // sbdbg DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS); SetErrorMode(old_mode | SEM_FAILCRITICALERRORS); From f85b51eba43953fb8c8a12b04d9d06840d6aa648 Mon Sep 17 00:00:00 2001 From: peardox Date: Fri, 2 May 2025 18:05:09 +0100 Subject: [PATCH 26/34] Commit resolution --- CMakeLists.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ead5136806e..463009d2729 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -230,7 +230,9 @@ if (MSVC) /wd4996 # Function or variable may be unsafe/deprecated ) function(disable_msvc_warnings target_name) - target_compile_options(${target_name} PRIVATE ${MSVC_WARNING_FLAGS}) + if(TARGET ${target_name}) + target_compile_options(${target_name} PRIVATE ${MSVC_WARNING_FLAGS}) + endif() endfunction() if (WHISPER_BUILD_EXAMPLES) @@ -246,4 +248,4 @@ if (MSVC) disable_msvc_warnings(whisper-bench) disable_msvc_warnings(quantize) endif() -endif() +endif() \ No newline at end of file From db59f98df5670b726c041168ca19d92a44d1d12e Mon Sep 17 00:00:00 2001 From: peardox Date: Fri, 2 May 2025 18:19:42 +0100 Subject: [PATCH 27/34] Conflict resolution --- ggml/CMakeLists.txt | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index 703a2f694b9..e392f8091b6 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -362,19 +362,17 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ggml) ### The following breaks GGML_CPU_ALL_VARIANTS ### -if(NOT DEFINED GGML_CPU_ALL_VARIANTS) - if (MSVC) - set(MSVC_WARNING_FLAGS - /wd4005 # Macro redefinition - /wd4244 # Conversion from one type to another type, possible loss of data - /wd4267 # Conversion from 'size_t' to a smaller type, possible loss of data - ) - function(disable_msvc_warnings target_name) - target_compile_options(${target_name} PRIVATE ${MSVC_WARNING_FLAGS}) - endfunction() - - disable_msvc_warnings(ggml-base) - disable_msvc_warnings(ggml) - disable_msvc_warnings(ggml-cpu) - endif() +if (MSVC AND NOT DEFINED GGML_CPU_ALL_VARIANTS) + set(MSVC_WARNING_FLAGS + /wd4005 # Macro redefinition + /wd4244 # Conversion from one type to another type, possible loss of data + /wd4267 # Conversion from 'size_t' to a smaller type, possible loss of data + ) + function(disable_msvc_warnings target_name) + target_compile_options(${target_name} PRIVATE ${MSVC_WARNING_FLAGS}) + endfunction() + + disable_msvc_warnings(ggml-base) + disable_msvc_warnings(ggml) + disable_msvc_warnings(ggml-cpu) endif() From 0cc4f5eb31da338b42877183de46780cc6d659e9 Mon Sep 17 00:00:00 2001 From: peardox Date: Sat, 3 May 2025 02:56:13 +0100 Subject: [PATCH 28/34] Enable BINGINGS_FLAT in some examples --- examples/bench/bench.cpp | 8 +++++++- examples/cli/cli.cpp | 8 +++++++- examples/command/command.cpp | 8 +++++++- examples/server/server.cpp | 1 + examples/stream/stream.cpp | 8 +++++++- examples/talk-llama/talk-llama.cpp | 8 +++++++- ggml/src/ggml-backend-reg.cpp | 5 +---- ggml/src/ggml-flat.h | 1 + 8 files changed, 38 insertions(+), 9 deletions(-) diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp index 9547735127a..1c90cb749fd 100644 --- a/examples/bench/bench.cpp +++ b/examples/bench/bench.cpp @@ -1,6 +1,7 @@ #include "whisper.h" #ifdef BINDINGS_FLAT #include "whisper-flat.h" +#include "../ggml/src/ggml-flat.h" #endif #include @@ -65,7 +66,12 @@ static int whisper_bench_full(const whisper_params & params) { // whisper init #ifdef BINDINGS_FLAT - whisper_flat_backend_load_all(); + fprintf(stderr, "+++ BINDINGS_FLAT +++\n"); + if(params.use_gpu) { + whisper_flat_backend_load_all(); + } else { + ggml_backend_try_load_best("cpu", nullptr); + } #endif struct whisper_context_params cparams = whisper_context_default_params(); diff --git a/examples/cli/cli.cpp b/examples/cli/cli.cpp index 52459de4d38..368f4220987 100644 --- a/examples/cli/cli.cpp +++ b/examples/cli/cli.cpp @@ -4,6 +4,7 @@ #include "whisper.h" #ifdef BINDINGS_FLAT #include "whisper-flat.h" +#include "../ggml/src/ggml-flat.h" #endif #include "grammar-parser.h" @@ -1008,7 +1009,12 @@ int main(int argc, char ** argv) { // whisper init #ifdef BINDINGS_FLAT - whisper_flat_backend_load_all(); + fprintf(stderr, "+++ BINDINGS_FLAT +++\n"); + if(params.use_gpu) { + whisper_flat_backend_load_all(); + } else { + ggml_backend_try_load_best("cpu", nullptr); + } #endif struct whisper_context_params cparams = whisper_context_default_params(); diff --git a/examples/command/command.cpp b/examples/command/command.cpp index 33a4c289d9c..a9ed13acbdd 100644 --- a/examples/command/command.cpp +++ b/examples/command/command.cpp @@ -11,6 +11,7 @@ #include "whisper.h" #ifdef BINDINGS_FLAT #include "whisper-flat.h" +#include "../ggml/src/ggml-flat.h" #endif #include "grammar-parser.h" @@ -696,7 +697,12 @@ int main(int argc, char ** argv) { // whisper init #ifdef BINDINGS_FLAT - whisper_flat_backend_load_all(); + fprintf(stderr, "+++ BINDINGS_FLAT +++\n"); + if(params.use_gpu) { + whisper_flat_backend_load_all(); + } else { + ggml_backend_try_load_best("cpu", nullptr); + } #endif struct whisper_context_params cparams = whisper_context_default_params(); diff --git a/examples/server/server.cpp b/examples/server/server.cpp index d9d96a4c813..c1100c39ace 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -549,6 +549,7 @@ int main(int argc, char ** argv) { } // whisper init #ifdef BINDINGS_FLAT + fprintf(stderr, "+++ BINDINGS_FLAT +++\n"); whisper_flat_backend_load_all(); #endif diff --git a/examples/stream/stream.cpp b/examples/stream/stream.cpp index 085a44fd9ef..198e509725f 100644 --- a/examples/stream/stream.cpp +++ b/examples/stream/stream.cpp @@ -8,6 +8,7 @@ #include "whisper.h" #ifdef BINDINGS_FLAT #include "whisper-flat.h" +#include "../ggml/src/ggml-flat.h" #endif #include @@ -159,7 +160,12 @@ int main(int argc, char ** argv) { } #ifdef BINDINGS_FLAT - whisper_flat_backend_load_all(); + fprintf(stderr, "+++ BINDINGS_FLAT +++\n"); + if(params.use_gpu) { + whisper_flat_backend_load_all(); + } else { + ggml_backend_try_load_best("cpu", nullptr); + } #endif struct whisper_context_params cparams = whisper_context_default_params(); diff --git a/examples/talk-llama/talk-llama.cpp b/examples/talk-llama/talk-llama.cpp index 21d3a950a8a..8bfdd6f63d1 100644 --- a/examples/talk-llama/talk-llama.cpp +++ b/examples/talk-llama/talk-llama.cpp @@ -7,6 +7,7 @@ #include "whisper.h" #ifdef BINDINGS_FLAT #include "whisper-flat.h" +#include "../ggml/src/ggml-flat.h" #endif #include "llama.h" @@ -291,7 +292,12 @@ int main(int argc, char ** argv) { // whisper init #ifdef BINDINGS_FLAT - whisper_flat_backend_load_all(); + fprintf(stderr, "+++ BINDINGS_FLAT +++\n"); + if(params.use_gpu) { + whisper_flat_backend_load_all(); + } else { + ggml_backend_try_load_best("cpu", nullptr); + } #endif struct whisper_context_params cparams = whisper_context_default_params(); diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp index 183c5d68eeb..6ac3998f726 100644 --- a/ggml/src/ggml-backend-reg.cpp +++ b/ggml/src/ggml-backend-reg.cpp @@ -109,9 +109,6 @@ struct dl_handle_deleter { static dl_handle * dl_load_library(const fs::path & path) { // suppress error dialogs for missing DLLs - #ifdef GGML_BINDINGS_FLAT // sbdbg - fprintf(stderr, "dl_load_library()%s",path.wstring().c_str()); // sbdbg - #endif // sbdbg DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS); SetErrorMode(old_mode | SEM_FAILCRITICALERRORS); @@ -595,7 +592,7 @@ void ggml_backend_load_all_from_path(const char * dir_path) { #ifdef GGML_BINDINGS_FLAT ggml_backend_reg_t ggml_backend_try_load_best(const char * name, const char * dir_path) { bool silent = true; - fprintf(stderr, "Performing ggml_backend_try_load_best(%s)\n", name); + fprintf(stderr, "%s: (%s)\n", __func__, name); return ggml_backend_load_best(name, silent, dir_path); } #endif diff --git a/ggml/src/ggml-flat.h b/ggml/src/ggml-flat.h index bb6bb1c055a..6e3942ec171 100644 --- a/ggml/src/ggml-flat.h +++ b/ggml/src/ggml-flat.h @@ -1,6 +1,7 @@ #pragma once #ifdef BINDINGS_FLAT +#define GGML_BINDINGS_FLAT #endif #ifdef GGML_SHARED From 9a690c543fc621e0227219ca9b4a597873b0602a Mon Sep 17 00:00:00 2001 From: peardox Date: Mon, 5 May 2025 12:43:57 +0100 Subject: [PATCH 29/34] Remove 'MSVC AND NOT DEFINED GGML_CPU_ALL_VARIANTS' in ggml cmake --- ggml/CMakeLists.txt | 49 +++++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index ed45daccefb..1c552a4bc07 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -361,29 +361,26 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake ${CMAKE_CURRENT_BINARY_DIR}/ggml-version.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ggml) -### The following breaks GGML_CPU_ALL_VARIANTS ### -if (MSVC AND NOT DEFINED GGML_CPU_ALL_VARIANTS) - set(MSVC_WARNING_FLAGS - /wd4005 # Macro redefinition - /wd4244 # Conversion from one type to another type, possible loss of data - /wd4267 # Conversion from 'size_t' to a smaller type, possible loss of data - /wd4996 # Disable POSIX deprecation warnings - /wd4702 # Unreachable code warnings - ) - function(disable_msvc_warnings target_name) - if(TARGET ${target_name}) - target_compile_options(${target_name} PRIVATE ${MSVC_WARNING_FLAGS}) - endif() - endfunction() - - disable_msvc_warnings(ggml-base) - disable_msvc_warnings(ggml) - disable_msvc_warnings(ggml-cpu) - disable_msvc_warnings(ggml-cpu-x64) - disable_msvc_warnings(ggml-cpu-sse42) - disable_msvc_warnings(ggml-cpu-sandybridge) - disable_msvc_warnings(ggml-cpu-haswell) - disable_msvc_warnings(ggml-cpu-skylakex) - disable_msvc_warnings(ggml-cpu-icelake) - disable_msvc_warnings(ggml-cpu-alderlake) -endif() +set(MSVC_WARNING_FLAGS + /wd4005 # Macro redefinition + /wd4244 # Conversion from one type to another type, possible loss of data + /wd4267 # Conversion from 'size_t' to a smaller type, possible loss of data + /wd4996 # Disable POSIX deprecation warnings + /wd4702 # Unreachable code warnings +) +function(disable_msvc_warnings target_name) + if(TARGET ${target_name}) + target_compile_options(${target_name} PRIVATE ${MSVC_WARNING_FLAGS}) + endif() +endfunction() + +disable_msvc_warnings(ggml-base) +disable_msvc_warnings(ggml) +disable_msvc_warnings(ggml-cpu) +disable_msvc_warnings(ggml-cpu-x64) +disable_msvc_warnings(ggml-cpu-sse42) +disable_msvc_warnings(ggml-cpu-sandybridge) +disable_msvc_warnings(ggml-cpu-haswell) +disable_msvc_warnings(ggml-cpu-skylakex) +disable_msvc_warnings(ggml-cpu-icelake) +disable_msvc_warnings(ggml-cpu-alderlake) From 420af26b3c7ff9fa6467f4cbc6fde0e311ecdcbf Mon Sep 17 00:00:00 2001 From: peardox Date: Mon, 5 May 2025 13:02:34 +0100 Subject: [PATCH 30/34] Replace new MSVC_WARNING_FLAGS with if msvc --- ggml/CMakeLists.txt | 48 +++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index 1c552a4bc07..3ace673afd9 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -361,26 +361,28 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake ${CMAKE_CURRENT_BINARY_DIR}/ggml-version.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ggml) -set(MSVC_WARNING_FLAGS - /wd4005 # Macro redefinition - /wd4244 # Conversion from one type to another type, possible loss of data - /wd4267 # Conversion from 'size_t' to a smaller type, possible loss of data - /wd4996 # Disable POSIX deprecation warnings - /wd4702 # Unreachable code warnings -) -function(disable_msvc_warnings target_name) - if(TARGET ${target_name}) - target_compile_options(${target_name} PRIVATE ${MSVC_WARNING_FLAGS}) - endif() -endfunction() - -disable_msvc_warnings(ggml-base) -disable_msvc_warnings(ggml) -disable_msvc_warnings(ggml-cpu) -disable_msvc_warnings(ggml-cpu-x64) -disable_msvc_warnings(ggml-cpu-sse42) -disable_msvc_warnings(ggml-cpu-sandybridge) -disable_msvc_warnings(ggml-cpu-haswell) -disable_msvc_warnings(ggml-cpu-skylakex) -disable_msvc_warnings(ggml-cpu-icelake) -disable_msvc_warnings(ggml-cpu-alderlake) +if (MSVC) + set(MSVC_WARNING_FLAGS + /wd4005 # Macro redefinition + /wd4244 # Conversion from one type to another type, possible loss of data + /wd4267 # Conversion from 'size_t' to a smaller type, possible loss of data + /wd4996 # Disable POSIX deprecation warnings + /wd4702 # Unreachable code warnings + ) + function(disable_msvc_warnings target_name) + if(TARGET ${target_name}) + target_compile_options(${target_name} PRIVATE ${MSVC_WARNING_FLAGS}) + endif() + endfunction() + + disable_msvc_warnings(ggml-base) + disable_msvc_warnings(ggml) + disable_msvc_warnings(ggml-cpu) + disable_msvc_warnings(ggml-cpu-x64) + disable_msvc_warnings(ggml-cpu-sse42) + disable_msvc_warnings(ggml-cpu-sandybridge) + disable_msvc_warnings(ggml-cpu-haswell) + disable_msvc_warnings(ggml-cpu-skylakex) + disable_msvc_warnings(ggml-cpu-icelake) + disable_msvc_warnings(ggml-cpu-alderlake) +endif() \ No newline at end of file From c71ced4dc661ce9562af3696d9320b03afa2a078 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Mon, 5 May 2025 21:59:23 +0900 Subject: [PATCH 31/34] Add BINDINGS_FLAT and GGML_BINDINGS_FLAT to Ruby install options --- bindings/ruby/ext/options.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bindings/ruby/ext/options.rb b/bindings/ruby/ext/options.rb index 6fed3184059..4c5ee7b9d11 100644 --- a/bindings/ruby/ext/options.rb +++ b/bindings/ruby/ext/options.rb @@ -61,6 +61,7 @@ def extra_options def configure filepath "ACCELERATE_FRAMEWORK" + ignored "BINDINGS_FLAT" ignored "BUILD_SHARED_LIBS" ignored "BUILD_TESTING" ignored "CMAKE_BUILD_TYPE" @@ -83,6 +84,7 @@ def configure bool "GGML_AVX_VNNI" ignored "GGML_BACKEND_DL" ignored "GGML_BIN_INSTALL_DIR" + ignored "GGML_BINDINGS_FLAT" bool "GGML_BLAS" string "GGML_BLAS_VENDOR" bool "GGML_BMI2" From bdf03cfd7adea5f80d0a7f338efd4d180406dd5c Mon Sep 17 00:00:00 2001 From: peardox Date: Wed, 7 May 2025 08:43:00 +0100 Subject: [PATCH 32/34] Updates as requested --- CMakeLists.txt | 4 +- bindings/ruby/ext/options.rb | 2 +- examples/CMakeLists.txt | 4 +- examples/bench/bench.cpp | 6 +- examples/cli/cli.cpp | 6 +- examples/command/command.cpp | 6 +- examples/server/server.cpp | 12 +- examples/stream/stream.cpp | 6 +- examples/talk-llama/talk-llama.cpp | 6 +- examples/wchess/wchess.cmd/wchess.cmd.cpp | 11 +- ggml/include/ggml-backend.h | 1 - ggml/include/ggml-backend.h.bak | 355 ++++++++++++++++++++++ ggml/src/CMakeLists.txt | 2 +- ggml/src/ggml-flat.h | 2 +- include/whisper.h | 2 +- src/CMakeLists.txt | 4 +- src/whisper-flat.cpp | 28 +- src/whisper-flat.h | 2 +- src/whisper.cpp | 21 +- 19 files changed, 414 insertions(+), 66 deletions(-) create mode 100644 ggml/include/ggml-backend.h.bak diff --git a/CMakeLists.txt b/CMakeLists.txt index 95374a106f9..21b664dcc76 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,8 +71,8 @@ option(WHISPER_FATAL_WARNINGS "whisper: enable -Werror flag" OFF) option(WHISPER_USE_SYSTEM_GGML "whisper: use system-installed GGML library" OFF) # flat bindings -option(BINDINGS_FLAT "Add extra flat definitions to Whisper + GGML" OFF) -option(GGML_BINDINGS_FLAT "Add extra flat definitions to Examples" ${BINDINGS_FLAT}) +option(WHISPER_BINDINGS_FLAT "Add extra flat definitions to Whisper + GGML" OFF) +option(GGML_BINDINGS_FLAT "Add extra flat definitions to Examples" ${WHISPER_BINDINGS_FLAT}) # sanitizers option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF) diff --git a/bindings/ruby/ext/options.rb b/bindings/ruby/ext/options.rb index 4c5ee7b9d11..b0def8a51ea 100644 --- a/bindings/ruby/ext/options.rb +++ b/bindings/ruby/ext/options.rb @@ -61,7 +61,6 @@ def extra_options def configure filepath "ACCELERATE_FRAMEWORK" - ignored "BINDINGS_FLAT" ignored "BUILD_SHARED_LIBS" ignored "BUILD_TESTING" ignored "CMAKE_BUILD_TYPE" @@ -167,6 +166,7 @@ def configure bool "WHISPER_ALL_WARNINGS" bool "WHISPER_ALL_WARNINGS_3RD_PARTY" ignored "WHISPER_BIN_INSTALL_DIR" + ignored "WHISPER_BINDINGS_FLAT" ignored "WHISPER_BUILD_EXAMPLES" ignored "WHISPER_BUILD_SERVER" ignored"WHISPER_BUILD_TESTS" diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 0adebb5b6ab..950bf1734ac 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -91,9 +91,9 @@ target_include_directories(json_cpp INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) # examples -if(BINDINGS_FLAT) +if(WHISPER_BINDINGS_FLAT) message(STATUS "Adding FLAT binding to examples") - add_compile_options(-DBINDINGS_FLAT) + add_compile_options(-DWHISPER_BINDINGS_FLAT) endif() diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp index 1c90cb749fd..16b1ce975e3 100644 --- a/examples/bench/bench.cpp +++ b/examples/bench/bench.cpp @@ -1,5 +1,5 @@ #include "whisper.h" -#ifdef BINDINGS_FLAT +#ifdef WHISPER_BINDINGS_FLAT #include "whisper-flat.h" #include "../ggml/src/ggml-flat.h" #endif @@ -65,8 +65,8 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para static int whisper_bench_full(const whisper_params & params) { // whisper init - #ifdef BINDINGS_FLAT - fprintf(stderr, "+++ BINDINGS_FLAT +++\n"); + #ifdef WHISPER_BINDINGS_FLAT + fprintf(stderr, "+++ WHISPER_BINDINGS_FLAT +++\n"); if(params.use_gpu) { whisper_flat_backend_load_all(); } else { diff --git a/examples/cli/cli.cpp b/examples/cli/cli.cpp index 53a3161380e..ff814b5aadf 100644 --- a/examples/cli/cli.cpp +++ b/examples/cli/cli.cpp @@ -2,7 +2,7 @@ #include "common-whisper.h" #include "whisper.h" -#ifdef BINDINGS_FLAT +#ifdef WHISPER_BINDINGS_FLAT #include "whisper-flat.h" #include "../ggml/src/ggml-flat.h" #endif @@ -939,8 +939,8 @@ int main(int argc, char ** argv) { // whisper init - #ifdef BINDINGS_FLAT - fprintf(stderr, "+++ BINDINGS_FLAT +++\n"); + #ifdef WHISPER_BINDINGS_FLAT + fprintf(stderr, "+++ WHISPER_BINDINGS_FLAT +++\n"); if(params.use_gpu) { whisper_flat_backend_load_all(); } else { diff --git a/examples/command/command.cpp b/examples/command/command.cpp index a9ed13acbdd..f3b59c241dd 100644 --- a/examples/command/command.cpp +++ b/examples/command/command.cpp @@ -9,7 +9,7 @@ #include "common-sdl.h" #include "common.h" #include "whisper.h" -#ifdef BINDINGS_FLAT +#ifdef WHISPER_BINDINGS_FLAT #include "whisper-flat.h" #include "../ggml/src/ggml-flat.h" #endif @@ -696,8 +696,8 @@ int main(int argc, char ** argv) { // whisper init - #ifdef BINDINGS_FLAT - fprintf(stderr, "+++ BINDINGS_FLAT +++\n"); + #ifdef WHISPER_BINDINGS_FLAT + fprintf(stderr, "+++ WHISPER_BINDINGS_FLAT +++\n"); if(params.use_gpu) { whisper_flat_backend_load_all(); } else { diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 6cc83ae6e16..2b49512a49b 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2,7 +2,7 @@ #include "common-whisper.h" #include "whisper.h" -#ifdef BINDINGS_FLAT +#ifdef WHISPER_BINDINGS_FLAT #include "whisper-flat.h" #endif #include "httplib.h" @@ -544,9 +544,13 @@ int main(int argc, char ** argv) { check_ffmpeg_availibility(); } // whisper init - #ifdef BINDINGS_FLAT - fprintf(stderr, "+++ BINDINGS_FLAT +++\n"); - whisper_flat_backend_load_all(); + #ifdef WHISPER_BINDINGS_FLAT + fprintf(stderr, "+++ WHISPER_BINDINGS_FLAT +++\n"); + if(params.use_gpu) { + whisper_flat_backend_load_all(); + } else { + ggml_backend_try_load_best("cpu", nullptr); + } #endif struct whisper_context_params cparams = whisper_context_default_params(); diff --git a/examples/stream/stream.cpp b/examples/stream/stream.cpp index 198e509725f..84afcdf4925 100644 --- a/examples/stream/stream.cpp +++ b/examples/stream/stream.cpp @@ -6,7 +6,7 @@ #include "common.h" #include "common-whisper.h" #include "whisper.h" -#ifdef BINDINGS_FLAT +#ifdef WHISPER_BINDINGS_FLAT #include "whisper-flat.h" #include "../ggml/src/ggml-flat.h" #endif @@ -159,8 +159,8 @@ int main(int argc, char ** argv) { exit(0); } - #ifdef BINDINGS_FLAT - fprintf(stderr, "+++ BINDINGS_FLAT +++\n"); + #ifdef WHISPER_BINDINGS_FLAT + fprintf(stderr, "+++ WHISPER_BINDINGS_FLAT +++\n"); if(params.use_gpu) { whisper_flat_backend_load_all(); } else { diff --git a/examples/talk-llama/talk-llama.cpp b/examples/talk-llama/talk-llama.cpp index 8bfdd6f63d1..e33816aff5b 100644 --- a/examples/talk-llama/talk-llama.cpp +++ b/examples/talk-llama/talk-llama.cpp @@ -5,7 +5,7 @@ #include "common.h" #include "common-whisper.h" #include "whisper.h" -#ifdef BINDINGS_FLAT +#ifdef WHISPER_BINDINGS_FLAT #include "whisper-flat.h" #include "../ggml/src/ggml-flat.h" #endif @@ -291,8 +291,8 @@ int main(int argc, char ** argv) { // whisper init - #ifdef BINDINGS_FLAT - fprintf(stderr, "+++ BINDINGS_FLAT +++\n"); + #ifdef WHISPER_BINDINGS_FLAT + fprintf(stderr, "+++ WHISPER_BINDINGS_FLAT +++\n"); if(params.use_gpu) { whisper_flat_backend_load_all(); } else { diff --git a/examples/wchess/wchess.cmd/wchess.cmd.cpp b/examples/wchess/wchess.cmd/wchess.cmd.cpp index a1a154a9dcc..015cc77bd9e 100644 --- a/examples/wchess/wchess.cmd/wchess.cmd.cpp +++ b/examples/wchess/wchess.cmd/wchess.cmd.cpp @@ -7,7 +7,7 @@ #include "WChess.h" #include "common-sdl.h" -#ifdef BINDINGS_FLAT +#ifdef WHISPER_BINDINGS_FLAT #include "whisper-flat.h" #endif #include @@ -185,8 +185,13 @@ int main(int argc, char ** argv) { // whisper init - #ifdef BINDINGS_FLAT - whisper_flat_backend_load_all(); + #ifdef WHISPER_BINDINGS_FLAT + fprintf(stderr, "+++ WHISPER_BINDINGS_FLAT +++\n"); + if(params.use_gpu) { + whisper_flat_backend_load_all(); + } else { + ggml_backend_try_load_best("cpu", nullptr); + } #endif struct whisper_context_params cparams = whisper_context_default_params(); diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h index 65b7c14d64e..64671495b38 100644 --- a/ggml/include/ggml-backend.h +++ b/ggml/include/ggml-backend.h @@ -348,7 +348,6 @@ extern "C" { // CPU buffer types are always available GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size); GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void); - #ifdef __cplusplus } diff --git a/ggml/include/ggml-backend.h.bak b/ggml/include/ggml-backend.h.bak new file mode 100644 index 00000000000..65b7c14d64e --- /dev/null +++ b/ggml/include/ggml-backend.h.bak @@ -0,0 +1,355 @@ +#pragma once + +#include "ggml.h" +#include "ggml-alloc.h" + +#ifdef GGML_BACKEND_SHARED +# if defined(_WIN32) && !defined(__MINGW32__) +# ifdef GGML_BACKEND_BUILD +# define GGML_BACKEND_API __declspec(dllexport) extern +# else +# define GGML_BACKEND_API __declspec(dllimport) extern +# endif +# else +# define GGML_BACKEND_API __attribute__ ((visibility ("default"))) extern +# endif +#else +# define GGML_BACKEND_API extern +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t; + typedef struct ggml_backend_buffer * ggml_backend_buffer_t; + typedef struct ggml_backend_event * ggml_backend_event_t; + typedef struct ggml_backend * ggml_backend_t; + typedef void * ggml_backend_graph_plan_t; + typedef struct ggml_backend_reg * ggml_backend_reg_t; + typedef struct ggml_backend_device * ggml_backend_dev_t; + + + // + // Backend buffer type + // + + GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft); + GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size); + GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft); + GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft); + GGML_API size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor); + GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft); + GGML_API ggml_backend_dev_t ggml_backend_buft_get_device (ggml_backend_buffer_type_t buft); + + // + // Backend buffer + // + + enum ggml_backend_buffer_usage { + GGML_BACKEND_BUFFER_USAGE_ANY = 0, + GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1, + GGML_BACKEND_BUFFER_USAGE_COMPUTE = 2, + }; + + GGML_API const char * ggml_backend_buffer_name (ggml_backend_buffer_t buffer); + GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer); + GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer); + GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer); + GGML_API enum ggml_status ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); + GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer); + GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer); + GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); + GGML_API void ggml_backend_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value); + GGML_API bool ggml_backend_buffer_is_host (ggml_backend_buffer_t buffer); + GGML_API void ggml_backend_buffer_set_usage (ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage); + GGML_API enum ggml_backend_buffer_usage ggml_backend_buffer_get_usage (ggml_backend_buffer_t buffer); + GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_get_type (ggml_backend_buffer_t buffer); + GGML_API void ggml_backend_buffer_reset (ggml_backend_buffer_t buffer); + + // tensor copy between different backends + GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst); + + // + // Backend (stream) + // + + GGML_API ggml_guid_t ggml_backend_guid(ggml_backend_t backend); + GGML_API const char * ggml_backend_name(ggml_backend_t backend); + GGML_API void ggml_backend_free(ggml_backend_t backend); + + GGML_API ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend); + GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size); + GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend); + GGML_API size_t ggml_backend_get_max_size(ggml_backend_t backend); + + GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); + GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); + + // "offset" refers to the offset in tensor->data for setting/getting data + GGML_API void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); + GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); + GGML_API void ggml_backend_tensor_memset( struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size); + + GGML_API void ggml_backend_synchronize(ggml_backend_t backend); + + GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph); + GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan); + + GGML_API enum ggml_status ggml_backend_graph_plan_compute (ggml_backend_t backend, ggml_backend_graph_plan_t plan); + GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph); + GGML_API enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph); + + // NOTE: will be removed, use device version instead + GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op); + GGML_API bool ggml_backend_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft); + GGML_API bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op); + + // asynchronous copy + // the copy is performed after all the currently queued operations in backend_src + // backend_dst will wait for the copy to complete before performing other operations + // automatic fallback to sync copy if async is not supported + GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst); + + GGML_API ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend); + + // + // Events + // + + GGML_API ggml_backend_event_t ggml_backend_event_new(ggml_backend_dev_t device); + GGML_API void ggml_backend_event_free(ggml_backend_event_t event); + GGML_API void ggml_backend_event_record(ggml_backend_event_t event, ggml_backend_t backend); + GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event); + GGML_API void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event); + + // + // Backend device + // + + enum ggml_backend_dev_type { + // CPU device using system memory + GGML_BACKEND_DEVICE_TYPE_CPU, + // GPU device using dedicated memory + GGML_BACKEND_DEVICE_TYPE_GPU, + // accelerator devices intended to be used together with the CPU backend (e.g. BLAS or AMX) + GGML_BACKEND_DEVICE_TYPE_ACCEL + }; + + // functionality supported by the device + struct ggml_backend_dev_caps { + // asynchronous operations + bool async; + // pinned host buffer + bool host_buffer; + // creating buffers from host ptr + bool buffer_from_host_ptr; + // event synchronization + bool events; + }; + + // all the device properties + struct ggml_backend_dev_props { + const char * name; + const char * description; + size_t memory_free; + size_t memory_total; + enum ggml_backend_dev_type type; + struct ggml_backend_dev_caps caps; + }; + + GGML_API const char * ggml_backend_dev_name(ggml_backend_dev_t device); + GGML_API const char * ggml_backend_dev_description(ggml_backend_dev_t device); + GGML_API void ggml_backend_dev_memory(ggml_backend_dev_t device, size_t * free, size_t * total); + GGML_API enum ggml_backend_dev_type ggml_backend_dev_type(ggml_backend_dev_t device); + GGML_API void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_dev_props * props); + GGML_API ggml_backend_reg_t ggml_backend_dev_backend_reg(ggml_backend_dev_t device); + GGML_API ggml_backend_t ggml_backend_dev_init(ggml_backend_dev_t device, const char * params); + GGML_API ggml_backend_buffer_type_t ggml_backend_dev_buffer_type(ggml_backend_dev_t device); + GGML_API ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device); + GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size); + + GGML_API bool ggml_backend_dev_supports_op(ggml_backend_dev_t device, const struct ggml_tensor * op); + GGML_API bool ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buffer_type_t buft); + GGML_API bool ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op); + + // + // Backend (reg) + // + + GGML_API const char * ggml_backend_reg_name(ggml_backend_reg_t reg); + GGML_API size_t ggml_backend_reg_dev_count(ggml_backend_reg_t reg); + GGML_API ggml_backend_dev_t ggml_backend_reg_dev_get(ggml_backend_reg_t reg, size_t index); + GGML_API void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * name); + + // Common functions that may be obtained using ggml_backend_reg_get_proc_address + + // Split buffer type for tensor parallelism + typedef ggml_backend_buffer_type_t (*ggml_backend_split_buffer_type_t)(int main_device, const float * tensor_split); + // Set the number of threads for the backend + typedef void (*ggml_backend_set_n_threads_t)(ggml_backend_t backend, int n_threads); + // Get additional buffer types provided by the device (returns a NULL-terminated array) + typedef ggml_backend_buffer_type_t * (*ggml_backend_dev_get_extra_bufts_t)(ggml_backend_dev_t device); + // Set the abort callback for the backend + typedef void (*ggml_backend_set_abort_callback_t)(ggml_backend_t backend, ggml_abort_callback abort_callback, void * abort_callback_data); + // Get a list of feature flags supported by the backend (returns a NULL-terminated array) + struct ggml_backend_feature { + const char * name; + const char * value; + }; + typedef struct ggml_backend_feature * (*ggml_backend_get_features_t)(ggml_backend_reg_t reg); + + // + // Backend registry + // + + GGML_API void ggml_backend_device_register(ggml_backend_dev_t device); + + // Backend (reg) enumeration + GGML_API size_t ggml_backend_reg_count(void); + GGML_API ggml_backend_reg_t ggml_backend_reg_get(size_t index); + GGML_API ggml_backend_reg_t ggml_backend_reg_by_name(const char * name); + + // Device enumeration + GGML_API size_t ggml_backend_dev_count(void); + GGML_API ggml_backend_dev_t ggml_backend_dev_get(size_t index); + GGML_API ggml_backend_dev_t ggml_backend_dev_by_name(const char * name); + GGML_API ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type); + + // Direct backend (stream) initialization + // = ggml_backend_dev_init(ggml_backend_dev_by_name(name), params) + GGML_API ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params); + // = ggml_backend_dev_init(ggml_backend_dev_by_type(type), params) + GGML_API ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params); + // = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU) OR ggml_backend_dev_by_type(CPU), NULL) + GGML_API ggml_backend_t ggml_backend_init_best(void); + + // Load a backend from a dynamic library and register it + GGML_API ggml_backend_reg_t ggml_backend_load(const char * path); + // Unload a backend if loaded dynamically and unregister it + GGML_API void ggml_backend_unload(ggml_backend_reg_t reg); + // Load all known backends from dynamic libraries + GGML_API void ggml_backend_load_all(void); + GGML_API void ggml_backend_load_all_from_path(const char * dir_path); + + // + // Backend scheduler + // + + // The backend scheduler allows for multiple backend devices to be used together + // Handles compute buffer allocation, assignment of tensors to backends, and copying of tensors between backends + // The backends are selected based on: + // - the backend that supports the operation + // - the location of the pre-allocated tensors (e.g. the weights) + /* + Example usage: + + // operations that use tensors allocated in a buffer with USAGE_WEIGHTS will be assigned + // preferrably to run on the same backend as the buffer + ggml_backend_buffer_set_usage(buf_weights, GGML_BACKEND_BUFFER_USAGE_WEIGHTS); + + sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, GGML_DEFAULT_GRAPH_SIZE, false); + + // initialize buffers from a max size graph (optional) + reserve_graph = build_graph(sched, max_batch_size); + + // manually assign nodes to a backend (optional, should not be needed in most cases) + struct ggml_tensor * node = ggml_mul_mat(ctx, ...); + ggml_backend_sched_set_tensor_backend(sched, node, backend_gpu); + + ggml_backend_sched_reserve(sched, reserve_graph); + + // compute + graph = build_graph(sched); // the graph and its tensors are single-use in terms of allocation, multi-use in terms of computation + for (int i = 0; i < 10; ++i) { + ggml_backend_sched_graph_compute(sched, graph); // on the first iteration the graph is allocated automatically + } + + // if there are graph inputs: + graph = build_graph(sched); // get a new graph that is not allocated (the metadata for the old graph is freed once ggml_free is called) + ggml_backend_sched_reset(sched); // clear the allocation of the previous graph + ggml_backend_sched_alloc_graph(sched, graph); // explicitly allocate the new graph but do not execute it + ggml_backend_tensor_set(input_tensor, ...); // copy data to the newly allocated graph tensors + ggml_backend_sched_graph_compute(sched, graph); // execute the graph + + // as an alternative to the above it is also possible to assign the inputs to a dedicated context and + // allocate them statically via ggml_backend_alloc_ctx_tensors + } + */ + + typedef struct ggml_backend_sched * ggml_backend_sched_t; + + // Evaluation callback for each node in the graph (set with ggml_backend_sched_set_eval_callback) + // when ask == true, the scheduler wants to know if the user wants to observe this node + // this allows the scheduler to batch nodes together in order to evaluate them in a single call + // + // when ask == false, the scheduler is passing the node tensor to the user for observation + // if the user returns false, the scheduler will cancel the graph compute + // + typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data); + + // Initialize a backend scheduler, backends with low index are given priority over backends with high index + GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel); + GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched); + + // Initialize backend buffers from a measure graph + GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph); // returns success + + GGML_API int ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched); + GGML_API ggml_backend_t ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i); + + // Get the number of splits of the last graph + GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched); + GGML_API int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched); + + GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend); + + GGML_API void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend); + GGML_API ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node); + + // Allocate and compute graph on the backend scheduler + GGML_API bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph); // returns success + GGML_API enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph); + GGML_API enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph); + GGML_API void ggml_backend_sched_synchronize(ggml_backend_sched_t sched); + + // Reset all assignments and allocators - must be called before changing the node backends or allocating a new graph. + // This in effect deallocates all tensors that were previously allocated and leaves them with dangling pointers. + // The correct way to use this API is to discard the deallocated tensors and create new ones. + GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched); + + // Set a callback to be called for each resulting node during graph compute + GGML_API void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data); + + // + // Utils + // + + struct ggml_backend_graph_copy { + ggml_backend_buffer_t buffer; + struct ggml_context * ctx_allocated; + struct ggml_context * ctx_unallocated; + struct ggml_cgraph * graph; + }; + + // Copy a graph to a different backend + GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph); + GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy); + + typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data); + + // Compare the output of two backends + GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data); + + // Tensor initialization + GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr); + GGML_API enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor); + + // CPU buffer types are always available + GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size); + GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void); + + +#ifdef __cplusplus +} +#endif diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index 5a7c5954027..96d4e65ed51 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -211,7 +211,7 @@ endif() set(GGML_LIBRARY_SOURCES ggml-backend-reg.cpp) -if(BINDINGS_FLAT) +if(WHISPER_BINDINGS_FLAT) message(STATUS "Adding FLAT GGML binding extras") set(FLAT_GGML_SOURCES diff --git a/ggml/src/ggml-flat.h b/ggml/src/ggml-flat.h index 6e3942ec171..81dccbf25ec 100644 --- a/ggml/src/ggml-flat.h +++ b/ggml/src/ggml-flat.h @@ -1,6 +1,6 @@ #pragma once -#ifdef BINDINGS_FLAT +#ifdef WHISPER_BINDINGS_FLAT #define GGML_BINDINGS_FLAT #endif diff --git a/include/whisper.h b/include/whisper.h index bcd448b8240..455f572af4a 100644 --- a/include/whisper.h +++ b/include/whisper.h @@ -670,7 +670,7 @@ extern "C" { WHISPER_API float whisper_full_get_segment_no_speech_prob_from_state(struct whisper_state * state, int i_segment); // For whisper-flat.cpp to expose - #ifdef BINDINGS_FLAT + #ifdef WHISPER_BINDINGS_FLAT struct whisper_activity { float sample_ms; float encode_ms; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2509517ce99..46309cb0b25 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -106,7 +106,7 @@ set(WHISPER_LIBRARY_SOURCES whisper.cpp ) -if(BINDINGS_FLAT) +if(WHISPER_BINDINGS_FLAT) message(STATUS "Adding FLAT Whisper binding extras") set(FLAT_WHISPER_SOURCES @@ -114,7 +114,7 @@ if(BINDINGS_FLAT) ) list(APPEND WHISPER_LIBRARY_SOURCES ${FLAT_WHISPER_SOURCES}) - set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DBINDINGS_FLAT) + set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_BINDINGS_FLAT) endif() add_library(whisper diff --git a/src/whisper-flat.cpp b/src/whisper-flat.cpp index 9b8474af9f2..12a21328588 100644 --- a/src/whisper-flat.cpp +++ b/src/whisper-flat.cpp @@ -1,35 +1,9 @@ #include "whisper.h" -#include "whisper-arch.h" - -#include "ggml.h" -#include "ggml-cpp.h" -#include "ggml-alloc.h" #include "ggml-backend.h" -#include -#include -#include -#define _USE_MATH_DEFINES -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - #include "whisper-flat.h" -#ifdef BINDINGS_FLAT +#ifdef WHISPER_BINDINGS_FLAT void whisper_flat_backend_load_all(void) { ggml_backend_load_all(); } diff --git a/src/whisper-flat.h b/src/whisper-flat.h index 39429405211..6998b6deb3c 100644 --- a/src/whisper-flat.h +++ b/src/whisper-flat.h @@ -20,7 +20,7 @@ extern "C" { #endif - #ifdef BINDINGS_FLAT + #ifdef WHISPER_BINDINGS_FLAT WHISPER_FLAT_API void whisper_flat_backend_load_all(void); WHISPER_FLAT_API struct whisper_activity * whisper_flat_get_activity_with_state(struct whisper_state * state); WHISPER_FLAT_API struct whisper_state * whisper_flat_get_state_from_context(struct whisper_context * ctx); diff --git a/src/whisper.cpp b/src/whisper.cpp index 6b38d34b4d8..ba009c3f35f 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -204,7 +204,7 @@ static bool ggml_graph_compute_helper( return t; } -#ifndef BINDINGS_FLAT +#ifndef WHISPER_BINDINGS_FLAT static void whisper_load_backends() { static std::once_flag flag; std::call_once(flag, []() { @@ -1309,7 +1309,7 @@ static size_t aheads_masks_nbytes(struct whisper_aheads_masks & aheads_masks) { static ggml_backend_t whisper_backend_init_gpu(const whisper_context_params & params) { ggml_log_set(g_state.log_callback, g_state.log_callback_user_data); - #ifndef BINDINGS_FLAT + #ifndef WHISPER_BINDINGS_FLAT whisper_load_backends(); #endif @@ -4328,7 +4328,7 @@ static int whisper_has_openvino(void) { const char * whisper_print_system_info(void) { static std::string s; - #ifndef BINDINGS_FLAT + #ifndef WHISPER_BINDINGS_FLAT whisper_load_backends(); #endif @@ -6785,7 +6785,7 @@ WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads) { } WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) { - #ifndef BINDINGS_FLAT + #ifndef WHISPER_BINDINGS_FLAT whisper_load_backends(); #endif @@ -7562,7 +7562,15 @@ static void whisper_log_callback_default(ggml_log_level level, const char * text fflush(stderr); } -#ifdef BINDINGS_FLAT +#ifdef WHISPER_BINDINGS_FLAT +// The optional WHISPER_BINDINGS_FLAT code is of limited use for most +// developers. The intended audience is those who are binding to +// another language. +// C++ specific constructs such as, but not limited to, std:vector +// are used frequently but unavailable to non-C++ developers. +// As such it is placed at the end of source in recognition of its +// limited appeal + // whisper_get_system_info_json // Returns system info as json, useful for language bindings // NOTE : While testing features->value always returned an int. @@ -7614,6 +7622,9 @@ const char * whisper_get_system_info_json(void) { // Returns state from supplied context pointer // This is mainly a helper for non-C++ language bindings as whisper_context // has embedded C++ specific types (e.g. maps and vectors) +// The returned whisper_state value can be treated as a an opaque object +// that need merely be 'plugged-in' to the following and other existing +// functions to obtain relevant information or functionality struct whisper_state * whisper_get_state_from_context(struct whisper_context * ctx) { if (!ctx->state) { return nullptr; From 85cf9ccd8743669673737a77aa27487c978bce9b Mon Sep 17 00:00:00 2001 From: peardox Date: Wed, 7 May 2025 10:13:07 +0100 Subject: [PATCH 33/34] Remove .h.bak file --- ggml/include/ggml-backend.h.bak | 355 -------------------------------- 1 file changed, 355 deletions(-) delete mode 100644 ggml/include/ggml-backend.h.bak diff --git a/ggml/include/ggml-backend.h.bak b/ggml/include/ggml-backend.h.bak deleted file mode 100644 index 65b7c14d64e..00000000000 --- a/ggml/include/ggml-backend.h.bak +++ /dev/null @@ -1,355 +0,0 @@ -#pragma once - -#include "ggml.h" -#include "ggml-alloc.h" - -#ifdef GGML_BACKEND_SHARED -# if defined(_WIN32) && !defined(__MINGW32__) -# ifdef GGML_BACKEND_BUILD -# define GGML_BACKEND_API __declspec(dllexport) extern -# else -# define GGML_BACKEND_API __declspec(dllimport) extern -# endif -# else -# define GGML_BACKEND_API __attribute__ ((visibility ("default"))) extern -# endif -#else -# define GGML_BACKEND_API extern -#endif - -#ifdef __cplusplus -extern "C" { -#endif - - typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t; - typedef struct ggml_backend_buffer * ggml_backend_buffer_t; - typedef struct ggml_backend_event * ggml_backend_event_t; - typedef struct ggml_backend * ggml_backend_t; - typedef void * ggml_backend_graph_plan_t; - typedef struct ggml_backend_reg * ggml_backend_reg_t; - typedef struct ggml_backend_device * ggml_backend_dev_t; - - - // - // Backend buffer type - // - - GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft); - GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size); - GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft); - GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft); - GGML_API size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor); - GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft); - GGML_API ggml_backend_dev_t ggml_backend_buft_get_device (ggml_backend_buffer_type_t buft); - - // - // Backend buffer - // - - enum ggml_backend_buffer_usage { - GGML_BACKEND_BUFFER_USAGE_ANY = 0, - GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1, - GGML_BACKEND_BUFFER_USAGE_COMPUTE = 2, - }; - - GGML_API const char * ggml_backend_buffer_name (ggml_backend_buffer_t buffer); - GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer); - GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer); - GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer); - GGML_API enum ggml_status ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); - GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer); - GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer); - GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); - GGML_API void ggml_backend_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value); - GGML_API bool ggml_backend_buffer_is_host (ggml_backend_buffer_t buffer); - GGML_API void ggml_backend_buffer_set_usage (ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage); - GGML_API enum ggml_backend_buffer_usage ggml_backend_buffer_get_usage (ggml_backend_buffer_t buffer); - GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_get_type (ggml_backend_buffer_t buffer); - GGML_API void ggml_backend_buffer_reset (ggml_backend_buffer_t buffer); - - // tensor copy between different backends - GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst); - - // - // Backend (stream) - // - - GGML_API ggml_guid_t ggml_backend_guid(ggml_backend_t backend); - GGML_API const char * ggml_backend_name(ggml_backend_t backend); - GGML_API void ggml_backend_free(ggml_backend_t backend); - - GGML_API ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend); - GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size); - GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend); - GGML_API size_t ggml_backend_get_max_size(ggml_backend_t backend); - - GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); - GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); - - // "offset" refers to the offset in tensor->data for setting/getting data - GGML_API void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); - GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); - GGML_API void ggml_backend_tensor_memset( struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size); - - GGML_API void ggml_backend_synchronize(ggml_backend_t backend); - - GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph); - GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan); - - GGML_API enum ggml_status ggml_backend_graph_plan_compute (ggml_backend_t backend, ggml_backend_graph_plan_t plan); - GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph); - GGML_API enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph); - - // NOTE: will be removed, use device version instead - GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op); - GGML_API bool ggml_backend_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft); - GGML_API bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op); - - // asynchronous copy - // the copy is performed after all the currently queued operations in backend_src - // backend_dst will wait for the copy to complete before performing other operations - // automatic fallback to sync copy if async is not supported - GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst); - - GGML_API ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend); - - // - // Events - // - - GGML_API ggml_backend_event_t ggml_backend_event_new(ggml_backend_dev_t device); - GGML_API void ggml_backend_event_free(ggml_backend_event_t event); - GGML_API void ggml_backend_event_record(ggml_backend_event_t event, ggml_backend_t backend); - GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event); - GGML_API void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event); - - // - // Backend device - // - - enum ggml_backend_dev_type { - // CPU device using system memory - GGML_BACKEND_DEVICE_TYPE_CPU, - // GPU device using dedicated memory - GGML_BACKEND_DEVICE_TYPE_GPU, - // accelerator devices intended to be used together with the CPU backend (e.g. BLAS or AMX) - GGML_BACKEND_DEVICE_TYPE_ACCEL - }; - - // functionality supported by the device - struct ggml_backend_dev_caps { - // asynchronous operations - bool async; - // pinned host buffer - bool host_buffer; - // creating buffers from host ptr - bool buffer_from_host_ptr; - // event synchronization - bool events; - }; - - // all the device properties - struct ggml_backend_dev_props { - const char * name; - const char * description; - size_t memory_free; - size_t memory_total; - enum ggml_backend_dev_type type; - struct ggml_backend_dev_caps caps; - }; - - GGML_API const char * ggml_backend_dev_name(ggml_backend_dev_t device); - GGML_API const char * ggml_backend_dev_description(ggml_backend_dev_t device); - GGML_API void ggml_backend_dev_memory(ggml_backend_dev_t device, size_t * free, size_t * total); - GGML_API enum ggml_backend_dev_type ggml_backend_dev_type(ggml_backend_dev_t device); - GGML_API void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_dev_props * props); - GGML_API ggml_backend_reg_t ggml_backend_dev_backend_reg(ggml_backend_dev_t device); - GGML_API ggml_backend_t ggml_backend_dev_init(ggml_backend_dev_t device, const char * params); - GGML_API ggml_backend_buffer_type_t ggml_backend_dev_buffer_type(ggml_backend_dev_t device); - GGML_API ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device); - GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size); - - GGML_API bool ggml_backend_dev_supports_op(ggml_backend_dev_t device, const struct ggml_tensor * op); - GGML_API bool ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buffer_type_t buft); - GGML_API bool ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op); - - // - // Backend (reg) - // - - GGML_API const char * ggml_backend_reg_name(ggml_backend_reg_t reg); - GGML_API size_t ggml_backend_reg_dev_count(ggml_backend_reg_t reg); - GGML_API ggml_backend_dev_t ggml_backend_reg_dev_get(ggml_backend_reg_t reg, size_t index); - GGML_API void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * name); - - // Common functions that may be obtained using ggml_backend_reg_get_proc_address - - // Split buffer type for tensor parallelism - typedef ggml_backend_buffer_type_t (*ggml_backend_split_buffer_type_t)(int main_device, const float * tensor_split); - // Set the number of threads for the backend - typedef void (*ggml_backend_set_n_threads_t)(ggml_backend_t backend, int n_threads); - // Get additional buffer types provided by the device (returns a NULL-terminated array) - typedef ggml_backend_buffer_type_t * (*ggml_backend_dev_get_extra_bufts_t)(ggml_backend_dev_t device); - // Set the abort callback for the backend - typedef void (*ggml_backend_set_abort_callback_t)(ggml_backend_t backend, ggml_abort_callback abort_callback, void * abort_callback_data); - // Get a list of feature flags supported by the backend (returns a NULL-terminated array) - struct ggml_backend_feature { - const char * name; - const char * value; - }; - typedef struct ggml_backend_feature * (*ggml_backend_get_features_t)(ggml_backend_reg_t reg); - - // - // Backend registry - // - - GGML_API void ggml_backend_device_register(ggml_backend_dev_t device); - - // Backend (reg) enumeration - GGML_API size_t ggml_backend_reg_count(void); - GGML_API ggml_backend_reg_t ggml_backend_reg_get(size_t index); - GGML_API ggml_backend_reg_t ggml_backend_reg_by_name(const char * name); - - // Device enumeration - GGML_API size_t ggml_backend_dev_count(void); - GGML_API ggml_backend_dev_t ggml_backend_dev_get(size_t index); - GGML_API ggml_backend_dev_t ggml_backend_dev_by_name(const char * name); - GGML_API ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type); - - // Direct backend (stream) initialization - // = ggml_backend_dev_init(ggml_backend_dev_by_name(name), params) - GGML_API ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params); - // = ggml_backend_dev_init(ggml_backend_dev_by_type(type), params) - GGML_API ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params); - // = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU) OR ggml_backend_dev_by_type(CPU), NULL) - GGML_API ggml_backend_t ggml_backend_init_best(void); - - // Load a backend from a dynamic library and register it - GGML_API ggml_backend_reg_t ggml_backend_load(const char * path); - // Unload a backend if loaded dynamically and unregister it - GGML_API void ggml_backend_unload(ggml_backend_reg_t reg); - // Load all known backends from dynamic libraries - GGML_API void ggml_backend_load_all(void); - GGML_API void ggml_backend_load_all_from_path(const char * dir_path); - - // - // Backend scheduler - // - - // The backend scheduler allows for multiple backend devices to be used together - // Handles compute buffer allocation, assignment of tensors to backends, and copying of tensors between backends - // The backends are selected based on: - // - the backend that supports the operation - // - the location of the pre-allocated tensors (e.g. the weights) - /* - Example usage: - - // operations that use tensors allocated in a buffer with USAGE_WEIGHTS will be assigned - // preferrably to run on the same backend as the buffer - ggml_backend_buffer_set_usage(buf_weights, GGML_BACKEND_BUFFER_USAGE_WEIGHTS); - - sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, GGML_DEFAULT_GRAPH_SIZE, false); - - // initialize buffers from a max size graph (optional) - reserve_graph = build_graph(sched, max_batch_size); - - // manually assign nodes to a backend (optional, should not be needed in most cases) - struct ggml_tensor * node = ggml_mul_mat(ctx, ...); - ggml_backend_sched_set_tensor_backend(sched, node, backend_gpu); - - ggml_backend_sched_reserve(sched, reserve_graph); - - // compute - graph = build_graph(sched); // the graph and its tensors are single-use in terms of allocation, multi-use in terms of computation - for (int i = 0; i < 10; ++i) { - ggml_backend_sched_graph_compute(sched, graph); // on the first iteration the graph is allocated automatically - } - - // if there are graph inputs: - graph = build_graph(sched); // get a new graph that is not allocated (the metadata for the old graph is freed once ggml_free is called) - ggml_backend_sched_reset(sched); // clear the allocation of the previous graph - ggml_backend_sched_alloc_graph(sched, graph); // explicitly allocate the new graph but do not execute it - ggml_backend_tensor_set(input_tensor, ...); // copy data to the newly allocated graph tensors - ggml_backend_sched_graph_compute(sched, graph); // execute the graph - - // as an alternative to the above it is also possible to assign the inputs to a dedicated context and - // allocate them statically via ggml_backend_alloc_ctx_tensors - } - */ - - typedef struct ggml_backend_sched * ggml_backend_sched_t; - - // Evaluation callback for each node in the graph (set with ggml_backend_sched_set_eval_callback) - // when ask == true, the scheduler wants to know if the user wants to observe this node - // this allows the scheduler to batch nodes together in order to evaluate them in a single call - // - // when ask == false, the scheduler is passing the node tensor to the user for observation - // if the user returns false, the scheduler will cancel the graph compute - // - typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data); - - // Initialize a backend scheduler, backends with low index are given priority over backends with high index - GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel); - GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched); - - // Initialize backend buffers from a measure graph - GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph); // returns success - - GGML_API int ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched); - GGML_API ggml_backend_t ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i); - - // Get the number of splits of the last graph - GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched); - GGML_API int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched); - - GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend); - - GGML_API void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend); - GGML_API ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node); - - // Allocate and compute graph on the backend scheduler - GGML_API bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph); // returns success - GGML_API enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph); - GGML_API enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph); - GGML_API void ggml_backend_sched_synchronize(ggml_backend_sched_t sched); - - // Reset all assignments and allocators - must be called before changing the node backends or allocating a new graph. - // This in effect deallocates all tensors that were previously allocated and leaves them with dangling pointers. - // The correct way to use this API is to discard the deallocated tensors and create new ones. - GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched); - - // Set a callback to be called for each resulting node during graph compute - GGML_API void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data); - - // - // Utils - // - - struct ggml_backend_graph_copy { - ggml_backend_buffer_t buffer; - struct ggml_context * ctx_allocated; - struct ggml_context * ctx_unallocated; - struct ggml_cgraph * graph; - }; - - // Copy a graph to a different backend - GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph); - GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy); - - typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data); - - // Compare the output of two backends - GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data); - - // Tensor initialization - GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr); - GGML_API enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor); - - // CPU buffer types are always available - GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size); - GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void); - - -#ifdef __cplusplus -} -#endif From c838670abc368a51f1a1640b9ede527de30c91fa Mon Sep 17 00:00:00 2001 From: peardox Date: Thu, 8 May 2025 03:12:28 +0100 Subject: [PATCH 34/34] re-introduce broken code for non WHISPER_BINDINGS_FLAT --- examples/server/server.cpp | 1 + examples/wchess/wchess.cmd/wchess.cmd.cpp | 1 + src/whisper.cpp | 2 ++ 3 files changed, 4 insertions(+) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 2b49512a49b..0a0318d1e10 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -4,6 +4,7 @@ #include "whisper.h" #ifdef WHISPER_BINDINGS_FLAT #include "whisper-flat.h" +#include "../ggml/src/ggml-flat.h" #endif #include "httplib.h" #include "json.hpp" diff --git a/examples/wchess/wchess.cmd/wchess.cmd.cpp b/examples/wchess/wchess.cmd/wchess.cmd.cpp index 015cc77bd9e..9b4bc425a64 100644 --- a/examples/wchess/wchess.cmd/wchess.cmd.cpp +++ b/examples/wchess/wchess.cmd/wchess.cmd.cpp @@ -9,6 +9,7 @@ #include "common-sdl.h" #ifdef WHISPER_BINDINGS_FLAT #include "whisper-flat.h" +#include "../ggml/src/ggml-flat.h" #endif #include diff --git a/src/whisper.cpp b/src/whisper.cpp index ba009c3f35f..30a4bedfedf 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -206,10 +206,12 @@ static bool ggml_graph_compute_helper( #ifndef WHISPER_BINDINGS_FLAT static void whisper_load_backends() { +#ifdef GGML_BACKEND_DL static std::once_flag flag; std::call_once(flag, []() { ggml_backend_load_all(); }); +#endif } #endif