From c35289369ff5bbe80f9f0eac0ac6ab3202e0fd92 Mon Sep 17 00:00:00 2001 From: "Metcalfe, Ryan" Date: Wed, 21 Jun 2023 16:21:02 -0400 Subject: [PATCH 1/9] openvino: use OpenVINO encoder inference --- CMakeLists.txt | 16 ++++ examples/main/main.cpp | 7 ++ openvino/whisper-openvino-encoder.cpp | 108 +++++++++++++++++++++++ openvino/whisper-openvino-encoder.h | 31 +++++++ whisper.cpp | 118 +++++++++++++++++++++++++- whisper.h | 17 ++++ 6 files changed, 294 insertions(+), 3 deletions(-) create mode 100644 openvino/whisper-openvino-encoder.cpp create mode 100644 openvino/whisper-openvino-encoder.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 60eb027641e..e844e5af31c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -54,6 +54,8 @@ option(WHISPER_NO_AVX2 "whisper: disable AVX2" OFF) option(WHISPER_NO_FMA "whisper: disable FMA" OFF) option(WHISPER_NO_F16C "whisper: disable F16c" OFF) +option(WHISPER_OPENVINO "whisper: support for OpenVINO" OFF) + if (APPLE) option(WHISPER_NO_ACCELERATE "whisper: disable Accelerate framework" OFF) option(WHISPER_COREML "whisper: enable Core ML framework" OFF) @@ -192,6 +194,10 @@ if (WHISPER_CLBLAST) endif() endif() +if( WHISPER_OPENVINO ) + find_package(OpenVINO REQUIRED COMPONENTS Runtime) +endif() + # compiler flags if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) @@ -297,6 +303,11 @@ if (WHISPER_COREML) ) endif() +if (WHISPER_OPENVINO) + set( OpenVINO_SOURCES openvino/whisper-openvino-encoder.h openvino/whisper-openvino-encoder.cpp ) + set( WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_OPENVINO) +endif() + # # whisper - this is the main library of the project # @@ -310,6 +321,7 @@ add_library(${TARGET} ${GGML_OPENCL_SOURCES} whisper.h whisper.cpp + ${OpenVINO_SOURCES} ) include(DefaultTargetOptions) @@ -322,6 +334,10 @@ if (WHISPER_COREML) target_link_libraries(${TARGET} PRIVATE whisper.coreml) endif() +if (WHISPER_OPENVINO) + target_link_libraries(${TARGET} PRIVATE openvino::runtime) +endif() + if (MSVC) target_link_libraries(${TARGET} PRIVATE ${WHISPER_EXTRA_LIBS} ${CMAKE_THREAD_LIBS_INIT}) diff --git a/examples/main/main.cpp b/examples/main/main.cpp index e659b7e59e6..94b44ccb975 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -87,6 +87,8 @@ struct whisper_params { std::string font_path = "/System/Library/Fonts/Supplemental/Courier New Bold.ttf"; std::string model = "models/ggml-base.en.bin"; + std::string openvino_encode_device = "CPU"; + std::vector fname_inp = {}; std::vector fname_out = {}; }; @@ -146,6 +148,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) { else if ( arg == "--prompt") { params.prompt = argv[++i]; } else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; } else if (arg == "-f" || arg == "--file") { params.fname_inp.emplace_back(argv[++i]); } + else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = argv[++i]; } else { fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); whisper_print_usage(argc, argv, params); @@ -197,6 +200,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para fprintf(stderr, " --prompt PROMPT [%-7s] initial prompt\n", params.prompt.c_str()); fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str()); fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input WAV file path\n", ""); + fprintf(stderr, " -oved D, --ov-e-device DNAME [%-7s] The OpenVINO device used for encode inference\n", params.openvino_encode_device.c_str()); fprintf(stderr, "\n"); } @@ -713,6 +717,9 @@ int main(int argc, char ** argv) { return 3; } + // initialize openvino encoder. This has no effect on whisper.cpp builds that don't have OpenVINO configured. + whisper_ctx_init_openvino_encoder(ctx, nullptr, params.openvino_encode_device.c_str(), nullptr); + for (int f = 0; f < (int) params.fname_inp.size(); ++f) { const auto fname_inp = params.fname_inp[f]; const auto fname_out = f < (int) params.fname_out.size() && !params.fname_out[f].empty() ? params.fname_out[f] : params.fname_inp[f]; diff --git a/openvino/whisper-openvino-encoder.cpp b/openvino/whisper-openvino-encoder.cpp new file mode 100644 index 00000000000..11aef39dd43 --- /dev/null +++ b/openvino/whisper-openvino-encoder.cpp @@ -0,0 +1,108 @@ +#include "openvino/whisper-openvino-encoder.h" +#include "ggml.h" +#include +#include + +struct whisper_openvino_context { + ov::InferRequest inferRequest; +}; + +struct whisper_openvino_context * whisper_openvino_init(const char* path_model, + const char* device, + const char* cache_dir) +{ + if (!path_model || !device) { + fprintf(stderr, "%s: path_model and/or device is null\n", __func__); + return nullptr; + } + + fprintf(stderr, "%s: path_model = %s, device = %s, cache_dir = %s\n", + __func__, path_model, device, cache_dir ? cache_dir : "(not set)"); + + whisper_openvino_context *context = new whisper_openvino_context; + try { + ov::Core core; + + if (cache_dir) { + // enables caching of device-specific 'blobs' during core.compile_model + // routine. This speeds up calls to compile_model for successive runs. + core.set_property(ov::cache_dir(cache_dir)); + } + + //Read the OpenVINO encoder IR (.xml/.bin) from disk, producing an ov::Model object. + std::shared_ptr model = core.read_model(path_model); + + // Produce a compiled-model object, given the device ("CPU", "GPU", etc.) + auto compiledModel = core.compile_model(model, device); + + // From the compiled model object, create an infer request. This is the thing that we + // we will use later on to trigger inference execution. + context->inferRequest = compiledModel.create_infer_request(); + } + catch (const std::exception& error) { + std::cout << "in openvino encoder compile routine: exception: " << error.what() << std::endl; + delete context; + context = nullptr; + } + + return context; +} + +void whisper_openvino_free(struct whisper_openvino_context * ctx) { + if( ctx ) { + delete ctx; + } +} + +int whisper_openvino_encode( + whisper_openvino_context* ctx, + ggml_tensor* mel, + ggml_tensor* out) { + + if (!ctx || !mel || !out) { + fprintf(stderr, "%s: Error! ctx / mel / out is null\n", __func__); + return 0; + } + + if (mel->n_dims != 2) { + fprintf(stderr, "%s: Error! mel ggml_tensor expected to have n_dims=2, but it has n_dims=%d\n", + __func__, mel->n_dims); + return 0; + } + + if (out->n_dims != 2) { + fprintf(stderr, "%s: Error! out ggml_tensor expected to have n_dims=2, but it has n_dims=%d\n", + __func__, out->n_dims); + return 0; + } + + try { + + //wrap the passed-in mel ggml_tensor as an OpenVINO Tensor object, and set as input tensor to infer request + { + // note, we populate shape & stride dimensions in opposite order from how they are listed in ne / nb arrays + ov::Shape input_shape = { 1, (unsigned long long)mel->ne[1], (unsigned long long)mel->ne[0] }; + ov::Strides input_strides = { mel->nb[2], mel->nb[1], mel->nb[0] }; + ov::Tensor input_tensor(ov::element::f32, input_shape, mel->data, input_strides); + ctx->inferRequest.set_input_tensor(input_tensor); + } + + //wrap the passed-in out ggml_tensor as an OpenVINO Tensor object, and set as output tensor to infer request + { + // note, we populate shape & stride dimensions in opposite order from how they are listed in ne / nb arrays + ov::Shape output_shape = { 1, (unsigned long long)out->ne[1], (unsigned long long)out->ne[0] }; + ov::Strides output_strides = { out->nb[2], out->nb[1], out->nb[0] }; + ov::Tensor out_tensor(ov::element::f32, output_shape, out->data, output_strides); + ctx->inferRequest.set_output_tensor(out_tensor); + } + + //run inference + ctx->inferRequest.infer(); + } + catch (const std::exception& error) { + std::cout << "in openvino encode inference execution routine: exception: " << error.what() << std::endl; + return 0; + } + + return 1; +} \ No newline at end of file diff --git a/openvino/whisper-openvino-encoder.h b/openvino/whisper-openvino-encoder.h new file mode 100644 index 00000000000..7c2f6dfc2e0 --- /dev/null +++ b/openvino/whisper-openvino-encoder.h @@ -0,0 +1,31 @@ +// Wrapper of the OpenVINO Whisper Encoder model +// + +#if __cplusplus +extern "C" { +#endif + +struct whisper_openvino_context; + +// initialize openvino encoder, given path to model xml, device ("CPU", "GPU", etc.), and +// path to cache_dir. Returns null upon failure. +struct whisper_openvino_context * whisper_openvino_init(const char * path_model, + const char * device, + const char * cache_dir); + +// clean up a ctx previously returned from whisper_openvino_init() +void whisper_openvino_free(struct whisper_openvino_context * ctx); + +struct ggml_tensor; + +// Perform encode using OpenVINO. +// Returns 1 on success +// Returns 0 on failure +int whisper_openvino_encode( + whisper_openvino_context* ctx, + ggml_tensor* mel, + ggml_tensor* out); + +#if __cplusplus +} +#endif diff --git a/whisper.cpp b/whisper.cpp index 0cdd4a1d49f..f1e7e17dd79 100644 --- a/whisper.cpp +++ b/whisper.cpp @@ -3,6 +3,10 @@ #include "coreml/whisper-encoder.h" #endif +#if WHISPER_USE_OPENVINO +#include "openvino/whisper-openvino-encoder.h" +#endif + #include "ggml.h" #include @@ -652,6 +656,10 @@ struct whisper_state { whisper_coreml_context * ctx_coreml = nullptr; #endif +#ifdef WHISPER_USE_OPENVINO + whisper_openvino_context* ctx_openvino = nullptr; +#endif + // [EXPERIMENTAL] token-level timestamps data int64_t t_beg = 0; int64_t t_last = 0; @@ -1463,7 +1471,13 @@ static bool whisper_encode_internal( const bool use_coreml = wstate.ctx_coreml != nullptr; #endif - if (!use_coreml) { +#ifndef WHISPER_USE_OPENVINO + const bool use_openvino = false; +#else + const bool use_openvino = wstate.ctx_openvino != nullptr; +#endif + + if (!use_coreml && !use_openvino) { // convolution + gelu { wstate.use_buf(ctx0, 1); @@ -1762,8 +1776,7 @@ static bool whisper_encode_internal( } } #ifdef WHISPER_USE_COREML - else - { + else if(use_coreml) { wstate.use_buf(ctx0, -1); cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx); @@ -1771,6 +1784,16 @@ static bool whisper_encode_internal( whisper_coreml_encode(wstate.ctx_coreml, (float *) mel->data, (float *) cur->data); } #endif +#ifdef WHISPER_USE_OPENVINO + else if(use_openvino) { + wstate.use_buf(ctx0, -1); + + cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx); + + if (!whisper_openvino_encode(wstate.ctx_openvino, mel, cur)) + return false; + } +#endif // cur //{ @@ -2613,6 +2636,31 @@ static std::string whisper_get_coreml_path_encoder(std::string path_bin) { } #endif +#ifdef WHISPER_USE_OPENVINO +// replace .bin with-encoder-openvino.xml +static std::string whisper_get_openvino_path_encoder(std::string path_bin) { + auto pos = path_bin.rfind('.'); + if (pos != std::string::npos) { + path_bin = path_bin.substr(0, pos); + } + + path_bin += "-encoder-openvino.xml"; + + return path_bin; +} + +static std::string whisper_get_openvino_path_cache(std::string path_bin) { + auto pos = path_bin.rfind('.'); + if (pos != std::string::npos) { + path_bin = path_bin.substr(0, pos); + } + + path_bin += "-encoder-openvino-cache"; + + return path_bin; +} +#endif + struct whisper_state * whisper_init_state(whisper_context * ctx) { whisper_state * state = new whisper_state; @@ -2679,6 +2727,54 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) { return state; } +int whisper_ctx_init_openvino_encoder(struct whisper_context* ctx, + const char* openvino_model_path, + const char* openvino_device, + const char* openvino_cache_dir) +{ +#ifndef WHISPER_USE_OPENVINO + return 0; +#else + if (!openvino_model_path && ctx->path_model.empty()) + { + fprintf(stderr, "%s: openvino_model_path is nullptr, and ctx has no model_path set.\n", __func__); + return 0; + } + + std::string path_openvino; + if (!openvino_model_path) { + //if openvino_model_path is not set, attempt to find it in the same directory as ggml-.bin model + path_openvino = whisper_get_openvino_path_encoder(ctx->path_model); + } + else { + path_openvino = openvino_model_path; + } + + std::string path_openvino_cache_dir; + if (!openvino_cache_dir) { + //if openvino_cache_dir is not set, set it as a dir residing next to ggml-.bin + path_openvino_cache_dir = whisper_get_openvino_path_cache(ctx->path_model); + } + else { + path_openvino_cache_dir = openvino_cache_dir; + } + + fprintf(stderr, "%s: loading OpenVINO model from '%s'\n", __func__, path_openvino.c_str()); + fprintf(stderr, "%s: first run on a device may take a while ...\n", __func__); + + ctx->state->ctx_openvino = whisper_openvino_init(path_openvino.c_str(), openvino_device, path_openvino_cache_dir.c_str()); + if (!ctx->state->ctx_openvino) { + fprintf(stderr, "%s: failed to init OpenVINO encoder from '%s'\n", __func__, path_openvino.c_str()); + return 0; + } + else { + fprintf(stderr, "%s: OpenVINO model loaded\n", __func__); + } + + return 1; +#endif +} + struct whisper_context * whisper_init_from_file_no_state(const char * path_model) { fprintf(stderr, "%s: loading model from '%s'\n", __func__, path_model); @@ -2833,6 +2929,13 @@ void whisper_free_state(struct whisper_state * state) } #endif +#ifdef WHISPER_USE_OPENVINO + if (state->ctx_openvino != nullptr) { + whisper_openvino_free(state->ctx_openvino); + state->ctx_openvino = nullptr; + } +#endif + delete state; } } @@ -3268,6 +3371,14 @@ static int whisper_has_coreml(void) { #endif } +static int whisper_has_openvino(void) { +#ifdef WHISPER_USE_OPENVINO + return 1; +#else + return 0; +#endif +} + const char * whisper_print_system_info(void) { static std::string s; @@ -3285,6 +3396,7 @@ const char * whisper_print_system_info(void) { s += "SSE3 = " + std::to_string(ggml_cpu_has_sse3()) + " | "; s += "VSX = " + std::to_string(ggml_cpu_has_vsx()) + " | "; s += "COREML = " + std::to_string(whisper_has_coreml()) + " | "; + s += "OPENVINO = " + std::to_string(whisper_has_openvino()) + " | "; return s.c_str(); } diff --git a/whisper.h b/whisper.h index e983c7d4fa3..7d70117d712 100644 --- a/whisper.h +++ b/whisper.h @@ -110,6 +110,23 @@ extern "C" { WHISPER_API struct whisper_state * whisper_init_state(struct whisper_context * ctx); + // Given a context, enable use of OpenVINO for encode inference. + // openvino_model_path: Optional path to OpenVINO encoder IR model. If set to nullptr, + // the path will be generated from the ggml model path that was passed + // in to whisper_init_from_file. For example, if 'path_model' was + // "/path/to/ggml-base.en.bin", then OpenVINO IR model path will be + // assumed to be "/path/to/ggml-base.en-encoder-openvino.xml". + // openvino_device: OpenVINO device to run inference on ("CPU", "GPU", etc.) + // openvino_cache_dir: Optional cache directory that can speed up init time, especially for + // GPU, by caching compiled 'blobs' there. + // Set to nullptr if not used. + // Returns 1 on success. If OpenVINO is not enabled in build, this + // simply returns 0. + WHISPER_API int whisper_ctx_init_openvino_encoder(struct whisper_context* ctx, + const char* openvino_model_path, + const char* openvino_device, + const char* openvino_cache_dir); + // Frees all allocated memory WHISPER_API void whisper_free (struct whisper_context * ctx); WHISPER_API void whisper_free_state(struct whisper_state * state); From 93b8be46830214a3b7348fd9fdd81ad0c018fca8 Mon Sep 17 00:00:00 2001 From: "Metcalfe, Ryan" Date: Wed, 21 Jun 2023 16:21:55 -0400 Subject: [PATCH 2/9] openvino: add python script for OpenVINO model generation --- models/convert-whisper-to-openvino.py | 53 +++++++++++++++++++++ models/openvino-conversion-requirements.txt | 2 + 2 files changed, 55 insertions(+) create mode 100644 models/convert-whisper-to-openvino.py create mode 100644 models/openvino-conversion-requirements.txt diff --git a/models/convert-whisper-to-openvino.py b/models/convert-whisper-to-openvino.py new file mode 100644 index 00000000000..31cf29abdf5 --- /dev/null +++ b/models/convert-whisper-to-openvino.py @@ -0,0 +1,53 @@ +import argparse +import torch +from whisper import load_model +import os +from openvino.tools import mo +from openvino.runtime import serialize +import shutil + +def convert_encoder(hparams, encoder, mname): + encoder.eval() + + mel = torch.zeros((1, 80, 3000)) + + onnx_folder=os.path.join(os.path.dirname(__file__),"onnx_encoder") + + #create a directory to store the onnx model, and other collateral that is saved during onnx export procedure + if not os.path.isdir(onnx_folder): + os.makedirs(onnx_folder) + + onnx_path = os.path.join(onnx_folder, "whisper_encoder.onnx") + + torch.onnx.export( + encoder, + mel, + onnx_path, + input_names=["mel"], + output_names=["output_features"] + ) + + # use model optimizer to convert onnx to OpenVINO IR format + encoder_model = mo.convert_model(onnx_path, compress_to_fp16=True) + serialize(encoder_model, xml_path='ggml-' + mname + '-encoder-openvino.xml') + + #cleanup + if os.path.isdir(onnx_folder): + shutil.rmtree(onnx_folder) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1)", required=True) + args = parser.parse_args() + + if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1"]: + raise ValueError("Invalid model name") + + whisper = load_model(args.model).cpu() + hparams = whisper.dims + + encoder = whisper.encoder + + # Convert encoder to onnx + convert_encoder(hparams, encoder, args.model) diff --git a/models/openvino-conversion-requirements.txt b/models/openvino-conversion-requirements.txt new file mode 100644 index 00000000000..5bfd95db88e --- /dev/null +++ b/models/openvino-conversion-requirements.txt @@ -0,0 +1,2 @@ +openvino-dev[pytorch,onnx] +openai-whisper \ No newline at end of file From 58eae32d14d1a385df78e1cdb9d542b1887cdd08 Mon Sep 17 00:00:00 2001 From: Ryan Metcalfe Date: Thu, 22 Jun 2023 14:38:43 -0400 Subject: [PATCH 3/9] whisper: Fix 'unused' warnings when OpenVINO isn't enabled in build --- whisper.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/whisper.cpp b/whisper.cpp index f1e7e17dd79..05ea677869d 100644 --- a/whisper.cpp +++ b/whisper.cpp @@ -2733,6 +2733,10 @@ int whisper_ctx_init_openvino_encoder(struct whisper_context* ctx, const char* openvino_cache_dir) { #ifndef WHISPER_USE_OPENVINO + (void)(ctx); + (void)(openvino_model_path); + (void)(openvino_device); + (void)(openvino_cache_dir); return 0; #else if (!openvino_model_path && ctx->path_model.empty()) From 4bc1ebcdc5cc738b1a7fff8c8ef7738a9283edd4 Mon Sep 17 00:00:00 2001 From: Ryan Metcalfe <107415876+RyanMetcalfeInt8@users.noreply.github.com> Date: Wed, 28 Jun 2023 15:18:30 -0400 Subject: [PATCH 4/9] Apply suggestions from code review Co-authored-by: Georgi Gerganov --- whisper.cpp | 6 +++--- whisper.h | 15 ++++++++------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/whisper.cpp b/whisper.cpp index 05ea677869d..291d9092dd0 100644 --- a/whisper.cpp +++ b/whisper.cpp @@ -1785,7 +1785,7 @@ static bool whisper_encode_internal( } #endif #ifdef WHISPER_USE_OPENVINO - else if(use_openvino) { + else if (use_openvino) { wstate.use_buf(ctx0, -1); cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx); @@ -2638,7 +2638,7 @@ static std::string whisper_get_coreml_path_encoder(std::string path_bin) { #ifdef WHISPER_USE_OPENVINO // replace .bin with-encoder-openvino.xml -static std::string whisper_get_openvino_path_encoder(std::string path_bin) { +static std::string whisper_get_openvino_path_encoder(const std::string & path_bin) { auto pos = path_bin.rfind('.'); if (pos != std::string::npos) { path_bin = path_bin.substr(0, pos); @@ -2649,7 +2649,7 @@ static std::string whisper_get_openvino_path_encoder(std::string path_bin) { return path_bin; } -static std::string whisper_get_openvino_path_cache(std::string path_bin) { +static std::string whisper_get_openvino_path_cache(const std::string & path_bin) { auto pos = path_bin.rfind('.'); if (pos != std::string::npos) { path_bin = path_bin.substr(0, pos); diff --git a/whisper.h b/whisper.h index 7d70117d712..021dbe388ca 100644 --- a/whisper.h +++ b/whisper.h @@ -111,21 +111,22 @@ extern "C" { WHISPER_API struct whisper_state * whisper_init_state(struct whisper_context * ctx); // Given a context, enable use of OpenVINO for encode inference. - // openvino_model_path: Optional path to OpenVINO encoder IR model. If set to nullptr, + // model_path: Optional path to OpenVINO encoder IR model. If set to nullptr, // the path will be generated from the ggml model path that was passed // in to whisper_init_from_file. For example, if 'path_model' was // "/path/to/ggml-base.en.bin", then OpenVINO IR model path will be // assumed to be "/path/to/ggml-base.en-encoder-openvino.xml". - // openvino_device: OpenVINO device to run inference on ("CPU", "GPU", etc.) - // openvino_cache_dir: Optional cache directory that can speed up init time, especially for + // device: OpenVINO device to run inference on ("CPU", "GPU", etc.) + // cache_dir: Optional cache directory that can speed up init time, especially for // GPU, by caching compiled 'blobs' there. // Set to nullptr if not used. // Returns 1 on success. If OpenVINO is not enabled in build, this // simply returns 0. - WHISPER_API int whisper_ctx_init_openvino_encoder(struct whisper_context* ctx, - const char* openvino_model_path, - const char* openvino_device, - const char* openvino_cache_dir); + WHISPER_API int whisper_ctx_init_openvino_encoder( + struct whisper_context * ctx, + const char * model_path, + const char * device, + const char * cache_dir); // Frees all allocated memory WHISPER_API void whisper_free (struct whisper_context * ctx); From 6bfa37112ff82295542ec2bf23f9dfafaae972fd Mon Sep 17 00:00:00 2001 From: "Metcalfe, Ryan" Date: Wed, 28 Jun 2023 15:56:47 -0400 Subject: [PATCH 5/9] whisper: Fix compilation error --- whisper.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/whisper.cpp b/whisper.cpp index 291d9092dd0..6fcc598fdad 100644 --- a/whisper.cpp +++ b/whisper.cpp @@ -2639,25 +2639,27 @@ static std::string whisper_get_coreml_path_encoder(std::string path_bin) { #ifdef WHISPER_USE_OPENVINO // replace .bin with-encoder-openvino.xml static std::string whisper_get_openvino_path_encoder(const std::string & path_bin) { + std::string openvino_path_encoder; auto pos = path_bin.rfind('.'); if (pos != std::string::npos) { - path_bin = path_bin.substr(0, pos); + openvino_path_encoder = path_bin.substr(0, pos); } - path_bin += "-encoder-openvino.xml"; + openvino_path_encoder += "-encoder-openvino.xml"; - return path_bin; + return openvino_path_encoder; } static std::string whisper_get_openvino_path_cache(const std::string & path_bin) { + std::string openvino_path_cache; auto pos = path_bin.rfind('.'); if (pos != std::string::npos) { - path_bin = path_bin.substr(0, pos); + openvino_path_cache = path_bin.substr(0, pos); } - path_bin += "-encoder-openvino-cache"; + openvino_path_cache += "-encoder-openvino-cache"; - return path_bin; + return openvino_path_cache; } #endif From df77368fb54d99678342af43e708ef7cf4af7e85 Mon Sep 17 00:00:00 2001 From: "Metcalfe, Ryan" Date: Wed, 28 Jun 2023 16:13:09 -0400 Subject: [PATCH 6/9] whisper: revert whisper_get_openvino_path_encoder & whisper_get_openvino_path_cache to non-const func signatures --- whisper.cpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/whisper.cpp b/whisper.cpp index 6fcc598fdad..d7940a1b087 100644 --- a/whisper.cpp +++ b/whisper.cpp @@ -2638,28 +2638,26 @@ static std::string whisper_get_coreml_path_encoder(std::string path_bin) { #ifdef WHISPER_USE_OPENVINO // replace .bin with-encoder-openvino.xml -static std::string whisper_get_openvino_path_encoder(const std::string & path_bin) { - std::string openvino_path_encoder; +static std::string whisper_get_openvino_path_encoder(std::string path_bin) { auto pos = path_bin.rfind('.'); if (pos != std::string::npos) { - openvino_path_encoder = path_bin.substr(0, pos); + path_bin = path_bin.substr(0, pos); } - openvino_path_encoder += "-encoder-openvino.xml"; + path_bin += "-encoder-openvino.xml"; - return openvino_path_encoder; + return path_bin; } -static std::string whisper_get_openvino_path_cache(const std::string & path_bin) { - std::string openvino_path_cache; +static std::string whisper_get_openvino_path_cache(std::string path_bin) { auto pos = path_bin.rfind('.'); if (pos != std::string::npos) { - openvino_path_cache = path_bin.substr(0, pos); + path_bin = path_bin.substr(0, pos); } - openvino_path_cache += "-encoder-openvino-cache"; + path_bin += "-encoder-openvino-cache"; - return openvino_path_cache; + return path_bin; } #endif From 76c41863933432b5f378c938cb27bacdc6b43912 Mon Sep 17 00:00:00 2001 From: "Metcalfe, Ryan" Date: Wed, 28 Jun 2023 16:52:22 -0400 Subject: [PATCH 7/9] cmake: Add openvino-encoder as separate object target --- CMakeLists.txt | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e844e5af31c..4115e4d31e2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -304,8 +304,21 @@ if (WHISPER_COREML) endif() if (WHISPER_OPENVINO) - set( OpenVINO_SOURCES openvino/whisper-openvino-encoder.h openvino/whisper-openvino-encoder.cpp ) + set(TARGET whisper.openvino) + + add_library(${TARGET} OBJECT + openvino/whisper-openvino-encoder.h + openvino/whisper-openvino-encoder.cpp + ) + + target_include_directories(${TARGET} PUBLIC + . + ) + + set_property(TARGET ${TARGET} PROPERTY POSITION_INDEPENDENT_CODE ON) set( WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_OPENVINO) + + target_link_libraries(${TARGET} PRIVATE openvino::runtime) endif() # @@ -321,7 +334,6 @@ add_library(${TARGET} ${GGML_OPENCL_SOURCES} whisper.h whisper.cpp - ${OpenVINO_SOURCES} ) include(DefaultTargetOptions) @@ -335,7 +347,7 @@ if (WHISPER_COREML) endif() if (WHISPER_OPENVINO) - target_link_libraries(${TARGET} PRIVATE openvino::runtime) + target_link_libraries(${TARGET} PRIVATE whisper.openvino) endif() if (MSVC) From bc5746e877474ca25a07c1e2c443ac8a77699365 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 4 Jul 2023 15:37:38 +0300 Subject: [PATCH 8/9] whisper : minor style fixes --- whisper.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/whisper.cpp b/whisper.cpp index d7940a1b087..c4359a3ffef 100644 --- a/whisper.cpp +++ b/whisper.cpp @@ -657,7 +657,7 @@ struct whisper_state { #endif #ifdef WHISPER_USE_OPENVINO - whisper_openvino_context* ctx_openvino = nullptr; + whisper_openvino_context * ctx_openvino = nullptr; #endif // [EXPERIMENTAL] token-level timestamps data @@ -1776,7 +1776,7 @@ static bool whisper_encode_internal( } } #ifdef WHISPER_USE_COREML - else if(use_coreml) { + else if (use_coreml) { wstate.use_buf(ctx0, -1); cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx); @@ -1790,8 +1790,9 @@ static bool whisper_encode_internal( cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx); - if (!whisper_openvino_encode(wstate.ctx_openvino, mel, cur)) + if (!whisper_openvino_encode(wstate.ctx_openvino, mel, cur)) { return false; + } } #endif From df982879fcdb1988e6f6b746fa6f61c16163ff95 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 4 Jul 2023 15:41:22 +0300 Subject: [PATCH 9/9] minor : indentation fixes --- CMakeLists.txt | 2 +- examples/main/main.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c1bc35f611d..88021e01470 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -316,7 +316,7 @@ if (WHISPER_OPENVINO) ) set_property(TARGET ${TARGET} PROPERTY POSITION_INDEPENDENT_CODE ON) - set( WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_OPENVINO) + set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_OPENVINO) target_link_libraries(${TARGET} PRIVATE openvino::runtime) endif() diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 5dcd09b05ac..9a68367186d 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -157,7 +157,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) { else if ( arg == "--prompt") { params.prompt = argv[++i]; } else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; } else if (arg == "-f" || arg == "--file") { params.fname_inp.emplace_back(argv[++i]); } - else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = argv[++i]; } + else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = argv[++i]; } else { fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); whisper_print_usage(argc, argv, params); @@ -210,7 +210,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para fprintf(stderr, " --prompt PROMPT [%-7s] initial prompt\n", params.prompt.c_str()); fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str()); fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input WAV file path\n", ""); - fprintf(stderr, " -oved D, --ov-e-device DNAME [%-7s] The OpenVINO device used for encode inference\n", params.openvino_encode_device.c_str()); + fprintf(stderr, " -oved D, --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n", params.openvino_encode_device.c_str()); fprintf(stderr, "\n"); }