From c35289369ff5bbe80f9f0eac0ac6ab3202e0fd92 Mon Sep 17 00:00:00 2001
From: "Metcalfe, Ryan" <ryan.metcalfe@intel.com>
Date: Wed, 21 Jun 2023 16:21:02 -0400
Subject: [PATCH 1/9] openvino: use OpenVINO encoder inference

---
 CMakeLists.txt                        |  16 ++++
 examples/main/main.cpp                |   7 ++
 openvino/whisper-openvino-encoder.cpp | 108 +++++++++++++++++++++++
 openvino/whisper-openvino-encoder.h   |  31 +++++++
 whisper.cpp                           | 118 +++++++++++++++++++++++++-
 whisper.h                             |  17 ++++
 6 files changed, 294 insertions(+), 3 deletions(-)
 create mode 100644 openvino/whisper-openvino-encoder.cpp
 create mode 100644 openvino/whisper-openvino-encoder.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 60eb027641e..e844e5af31c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -54,6 +54,8 @@ option(WHISPER_NO_AVX2                "whisper: disable AVX2" OFF)
 option(WHISPER_NO_FMA                 "whisper: disable FMA"  OFF)
 option(WHISPER_NO_F16C                "whisper: disable F16c" OFF)
 
+option(WHISPER_OPENVINO               "whisper: support for OpenVINO" OFF)
+
 if (APPLE)
     option(WHISPER_NO_ACCELERATE         "whisper: disable Accelerate framework" OFF)
     option(WHISPER_COREML                "whisper: enable Core ML framework"     OFF)
@@ -192,6 +194,10 @@ if (WHISPER_CLBLAST)
     endif()
 endif()
 
+if( WHISPER_OPENVINO )
+    find_package(OpenVINO REQUIRED COMPONENTS Runtime)
+endif()
+
 # compiler flags
 
 if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
@@ -297,6 +303,11 @@ if (WHISPER_COREML)
         )
 endif()
 
+if (WHISPER_OPENVINO)
+    set( OpenVINO_SOURCES openvino/whisper-openvino-encoder.h openvino/whisper-openvino-encoder.cpp )
+    set( WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_OPENVINO)
+endif()
+
 #
 # whisper - this is the main library of the project
 #
@@ -310,6 +321,7 @@ add_library(${TARGET}
     ${GGML_OPENCL_SOURCES}
     whisper.h
     whisper.cpp
+    ${OpenVINO_SOURCES}
     )
 
 include(DefaultTargetOptions)
@@ -322,6 +334,10 @@ if (WHISPER_COREML)
     target_link_libraries(${TARGET} PRIVATE whisper.coreml)
 endif()
 
+if (WHISPER_OPENVINO)
+    target_link_libraries(${TARGET} PRIVATE openvino::runtime)
+endif()
+
 if (MSVC)
     target_link_libraries(${TARGET} PRIVATE ${WHISPER_EXTRA_LIBS} ${CMAKE_THREAD_LIBS_INIT})
 
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index e659b7e59e6..94b44ccb975 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -87,6 +87,8 @@ struct whisper_params {
     std::string font_path = "/System/Library/Fonts/Supplemental/Courier New Bold.ttf";
     std::string model    = "models/ggml-base.en.bin";
 
+    std::string openvino_encode_device = "CPU";
+
     std::vector<std::string> fname_inp = {};
     std::vector<std::string> fname_out = {};
 };
@@ -146,6 +148,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
         else if (                  arg == "--prompt")         { params.prompt         = argv[++i]; }
         else if (arg == "-m"    || arg == "--model")          { params.model          = argv[++i]; }
         else if (arg == "-f"    || arg == "--file")           { params.fname_inp.emplace_back(argv[++i]); }
+        else if (arg == "-oved" || arg == "--ov-e-device")    { params.openvino_encode_device = argv[++i]; }
         else {
             fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
             whisper_print_usage(argc, argv, params);
@@ -197,6 +200,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
     fprintf(stderr, "             --prompt PROMPT     [%-7s] initial prompt\n",                                 params.prompt.c_str());
     fprintf(stderr, "  -m FNAME,  --model FNAME       [%-7s] model path\n",                                     params.model.c_str());
     fprintf(stderr, "  -f FNAME,  --file FNAME        [%-7s] input WAV file path\n",                            "");
+    fprintf(stderr, "  -oved D,   --ov-e-device DNAME [%-7s] The OpenVINO device used for encode inference\n",  params.openvino_encode_device.c_str());
     fprintf(stderr, "\n");
 }
 
@@ -713,6 +717,9 @@ int main(int argc, char ** argv) {
         return 3;
     }
 
+    // initialize openvino encoder. This has no effect on whisper.cpp builds that don't have OpenVINO configured.
+    whisper_ctx_init_openvino_encoder(ctx, nullptr, params.openvino_encode_device.c_str(), nullptr);
+
     for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
         const auto fname_inp = params.fname_inp[f];
 		const auto fname_out = f < (int) params.fname_out.size() && !params.fname_out[f].empty() ? params.fname_out[f] : params.fname_inp[f];
diff --git a/openvino/whisper-openvino-encoder.cpp b/openvino/whisper-openvino-encoder.cpp
new file mode 100644
index 00000000000..11aef39dd43
--- /dev/null
+++ b/openvino/whisper-openvino-encoder.cpp
@@ -0,0 +1,108 @@
+#include "openvino/whisper-openvino-encoder.h"
+#include "ggml.h"
+#include <openvino/openvino.hpp>
+#include <iostream>
+
+struct whisper_openvino_context {
+    ov::InferRequest inferRequest;
+};
+
+struct whisper_openvino_context * whisper_openvino_init(const char* path_model,
+    const char* device,
+    const char* cache_dir)
+{
+    if (!path_model || !device) {
+        fprintf(stderr, "%s: path_model and/or device is null\n", __func__);
+        return nullptr;
+    }
+
+    fprintf(stderr, "%s: path_model = %s, device = %s, cache_dir = %s\n",
+        __func__, path_model, device, cache_dir ? cache_dir : "(not set)");
+
+	whisper_openvino_context *context = new whisper_openvino_context;
+    try {
+        ov::Core core;
+
+        if (cache_dir) {
+            // enables caching of device-specific 'blobs' during core.compile_model
+            // routine. This speeds up calls to compile_model for successive runs.
+            core.set_property(ov::cache_dir(cache_dir));
+        }
+
+        //Read the OpenVINO encoder IR (.xml/.bin) from disk, producing an ov::Model object.
+        std::shared_ptr<ov::Model> model = core.read_model(path_model);
+
+        // Produce a compiled-model object, given the device ("CPU", "GPU", etc.)
+        auto compiledModel = core.compile_model(model, device);
+
+        // From the compiled model object, create an infer request. This is the thing that we
+        //  we will use later on to trigger inference execution.
+        context->inferRequest = compiledModel.create_infer_request();
+    }
+    catch (const std::exception& error) {
+        std::cout << "in openvino encoder compile routine: exception: " << error.what() << std::endl;
+        delete context;
+        context = nullptr;
+    }
+
+    return context;
+}
+
+void whisper_openvino_free(struct whisper_openvino_context * ctx) {
+    if( ctx ) {
+        delete ctx;
+    }
+}
+
+int whisper_openvino_encode(
+    whisper_openvino_context* ctx,
+    ggml_tensor* mel,
+    ggml_tensor* out) {
+
+    if (!ctx || !mel || !out) {
+        fprintf(stderr, "%s: Error! ctx / mel / out is null\n", __func__);
+        return 0;
+    }
+
+    if (mel->n_dims != 2) {
+        fprintf(stderr, "%s: Error! mel ggml_tensor expected to have n_dims=2, but it has n_dims=%d\n",
+            __func__, mel->n_dims);
+        return 0;
+    }
+
+    if (out->n_dims != 2) {
+        fprintf(stderr, "%s: Error! out ggml_tensor expected to have n_dims=2, but it has n_dims=%d\n",
+            __func__, out->n_dims);
+        return 0;
+    }
+
+    try {
+
+        //wrap the passed-in mel ggml_tensor as an OpenVINO Tensor object, and set as input tensor to infer request
+        {
+            // note, we populate shape & stride dimensions in opposite order from how they are listed in ne / nb arrays
+            ov::Shape input_shape = { 1, (unsigned long long)mel->ne[1], (unsigned long long)mel->ne[0] };
+            ov::Strides input_strides = { mel->nb[2], mel->nb[1], mel->nb[0] };
+            ov::Tensor input_tensor(ov::element::f32, input_shape, mel->data, input_strides);
+            ctx->inferRequest.set_input_tensor(input_tensor);
+        }
+
+        //wrap the passed-in out ggml_tensor as an OpenVINO Tensor object, and set as output tensor to infer request
+        {
+            // note, we populate shape & stride dimensions in opposite order from how they are listed in ne / nb arrays
+            ov::Shape output_shape = { 1, (unsigned long long)out->ne[1], (unsigned long long)out->ne[0] };
+            ov::Strides output_strides = { out->nb[2], out->nb[1], out->nb[0] };
+            ov::Tensor out_tensor(ov::element::f32, output_shape, out->data, output_strides);
+            ctx->inferRequest.set_output_tensor(out_tensor);
+        }
+
+        //run inference
+        ctx->inferRequest.infer();
+    }
+    catch (const std::exception& error) {
+        std::cout << "in openvino encode inference execution routine: exception: " << error.what() << std::endl;
+        return 0;
+    }
+
+    return 1;
+}
\ No newline at end of file
diff --git a/openvino/whisper-openvino-encoder.h b/openvino/whisper-openvino-encoder.h
new file mode 100644
index 00000000000..7c2f6dfc2e0
--- /dev/null
+++ b/openvino/whisper-openvino-encoder.h
@@ -0,0 +1,31 @@
+// Wrapper of the OpenVINO Whisper Encoder model
+//
+
+#if __cplusplus
+extern "C" {
+#endif
+
+struct whisper_openvino_context;
+
+// initialize openvino encoder, given path to model xml, device ("CPU", "GPU", etc.), and
+// path to cache_dir. Returns null upon failure.
+struct whisper_openvino_context * whisper_openvino_init(const char * path_model,
+                                                        const char * device,
+                                                        const char * cache_dir);
+
+// clean up a ctx previously returned from whisper_openvino_init()
+void whisper_openvino_free(struct whisper_openvino_context * ctx);
+
+struct ggml_tensor;
+
+// Perform encode using OpenVINO.
+// Returns 1 on success
+// Returns 0 on failure
+int whisper_openvino_encode(
+    whisper_openvino_context* ctx,
+    ggml_tensor* mel,
+    ggml_tensor* out);
+
+#if __cplusplus
+}
+#endif
diff --git a/whisper.cpp b/whisper.cpp
index 0cdd4a1d49f..f1e7e17dd79 100644
--- a/whisper.cpp
+++ b/whisper.cpp
@@ -3,6 +3,10 @@
 #include "coreml/whisper-encoder.h"
 #endif
 
+#if WHISPER_USE_OPENVINO
+#include "openvino/whisper-openvino-encoder.h"
+#endif
+
 #include "ggml.h"
 
 #include <algorithm>
@@ -652,6 +656,10 @@ struct whisper_state {
     whisper_coreml_context * ctx_coreml = nullptr;
 #endif
 
+#ifdef WHISPER_USE_OPENVINO
+    whisper_openvino_context* ctx_openvino = nullptr;
+#endif
+
     // [EXPERIMENTAL] token-level timestamps data
     int64_t t_beg = 0;
     int64_t t_last = 0;
@@ -1463,7 +1471,13 @@ static bool whisper_encode_internal(
     const bool use_coreml = wstate.ctx_coreml != nullptr;
 #endif
 
-    if (!use_coreml) {
+#ifndef WHISPER_USE_OPENVINO
+    const bool use_openvino = false;
+#else
+    const bool use_openvino = wstate.ctx_openvino != nullptr;
+#endif
+
+    if (!use_coreml && !use_openvino) {
         // convolution + gelu
         {
             wstate.use_buf(ctx0, 1);
@@ -1762,8 +1776,7 @@ static bool whisper_encode_internal(
         }
     }
 #ifdef WHISPER_USE_COREML
-    else
-    {
+    else if(use_coreml) {
         wstate.use_buf(ctx0, -1);
 
         cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx);
@@ -1771,6 +1784,16 @@ static bool whisper_encode_internal(
         whisper_coreml_encode(wstate.ctx_coreml, (float *) mel->data, (float *) cur->data);
     }
 #endif
+#ifdef WHISPER_USE_OPENVINO
+    else if(use_openvino) {
+        wstate.use_buf(ctx0, -1);
+
+        cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx);
+
+        if (!whisper_openvino_encode(wstate.ctx_openvino, mel, cur))
+            return false;
+    }
+#endif
 
     // cur
     //{
@@ -2613,6 +2636,31 @@ static std::string whisper_get_coreml_path_encoder(std::string path_bin) {
 }
 #endif
 
+#ifdef WHISPER_USE_OPENVINO
+// replace .bin with-encoder-openvino.xml
+static std::string whisper_get_openvino_path_encoder(std::string path_bin) {
+    auto pos = path_bin.rfind('.');
+    if (pos != std::string::npos) {
+        path_bin = path_bin.substr(0, pos);
+    }
+
+    path_bin += "-encoder-openvino.xml";
+
+    return path_bin;
+}
+
+static std::string whisper_get_openvino_path_cache(std::string path_bin) {
+    auto pos = path_bin.rfind('.');
+    if (pos != std::string::npos) {
+        path_bin = path_bin.substr(0, pos);
+    }
+
+    path_bin += "-encoder-openvino-cache";
+
+    return path_bin;
+}
+#endif
+
 struct whisper_state * whisper_init_state(whisper_context * ctx) {
     whisper_state * state = new whisper_state;
 
@@ -2679,6 +2727,54 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
     return state;
 }
 
+int whisper_ctx_init_openvino_encoder(struct whisper_context* ctx,
+    const char* openvino_model_path,
+    const char* openvino_device,
+    const char* openvino_cache_dir)
+{
+#ifndef WHISPER_USE_OPENVINO
+    return 0;
+#else
+    if (!openvino_model_path && ctx->path_model.empty())
+    {
+        fprintf(stderr, "%s: openvino_model_path is nullptr, and ctx has no model_path set.\n", __func__);
+        return 0;
+    }
+
+    std::string path_openvino;
+    if (!openvino_model_path) {
+        //if openvino_model_path is not set, attempt to find it in the same directory as ggml-<model>.bin model
+        path_openvino = whisper_get_openvino_path_encoder(ctx->path_model);
+    }
+    else {
+        path_openvino = openvino_model_path;
+    }
+
+    std::string path_openvino_cache_dir;
+    if (!openvino_cache_dir) {
+        //if openvino_cache_dir is not set, set it as a dir residing next to ggml-<model>.bin
+        path_openvino_cache_dir = whisper_get_openvino_path_cache(ctx->path_model);
+    }
+    else {
+        path_openvino_cache_dir = openvino_cache_dir;
+    }
+
+    fprintf(stderr, "%s: loading OpenVINO model from '%s'\n", __func__, path_openvino.c_str());
+    fprintf(stderr, "%s: first run on a device may take a while ...\n", __func__);
+
+    ctx->state->ctx_openvino = whisper_openvino_init(path_openvino.c_str(), openvino_device, path_openvino_cache_dir.c_str());
+    if (!ctx->state->ctx_openvino) {
+        fprintf(stderr, "%s: failed to init OpenVINO encoder from '%s'\n", __func__, path_openvino.c_str());
+        return 0;
+    }
+    else {
+        fprintf(stderr, "%s: OpenVINO model loaded\n", __func__);
+    }
+
+    return 1;
+#endif
+}
+
 struct whisper_context * whisper_init_from_file_no_state(const char * path_model) {
 
     fprintf(stderr, "%s: loading model from '%s'\n", __func__, path_model);
@@ -2833,6 +2929,13 @@ void whisper_free_state(struct whisper_state * state)
         }
 #endif
 
+#ifdef WHISPER_USE_OPENVINO
+        if (state->ctx_openvino != nullptr) {
+            whisper_openvino_free(state->ctx_openvino);
+            state->ctx_openvino = nullptr;
+        }
+#endif
+
         delete state;
     }
 }
@@ -3268,6 +3371,14 @@ static int whisper_has_coreml(void) {
 #endif
 }
 
+static int whisper_has_openvino(void) {
+#ifdef WHISPER_USE_OPENVINO
+    return 1;
+#else
+    return 0;
+#endif
+}
+
 const char * whisper_print_system_info(void) {
     static std::string s;
 
@@ -3285,6 +3396,7 @@ const char * whisper_print_system_info(void) {
     s += "SSE3 = "      + std::to_string(ggml_cpu_has_sse3())      + " | ";
     s += "VSX = "       + std::to_string(ggml_cpu_has_vsx())       + " | ";
     s += "COREML = "    + std::to_string(whisper_has_coreml())     + " | ";
+    s += "OPENVINO = "  + std::to_string(whisper_has_openvino())   + " | ";
 
     return s.c_str();
 }
diff --git a/whisper.h b/whisper.h
index e983c7d4fa3..7d70117d712 100644
--- a/whisper.h
+++ b/whisper.h
@@ -110,6 +110,23 @@ extern "C" {
 
     WHISPER_API struct whisper_state * whisper_init_state(struct whisper_context * ctx);
 
+    // Given a context, enable use of OpenVINO for encode inference.
+    // openvino_model_path: Optional path to OpenVINO encoder IR model. If set to nullptr,
+    //                      the path will be generated from the ggml model path that was passed
+    //                      in to whisper_init_from_file. For example, if 'path_model' was
+    //                      "/path/to/ggml-base.en.bin", then OpenVINO IR model path will be
+    //                      assumed to be "/path/to/ggml-base.en-encoder-openvino.xml".
+    // openvino_device: OpenVINO device to run inference on ("CPU", "GPU", etc.)
+    // openvino_cache_dir: Optional cache directory that can speed up init time, especially for
+    //                     GPU, by caching compiled 'blobs' there.
+    //                     Set to nullptr if not used.
+    // Returns 1 on success. If OpenVINO is not enabled in build, this
+    // simply returns 0.
+    WHISPER_API int whisper_ctx_init_openvino_encoder(struct whisper_context* ctx,
+        const char* openvino_model_path,
+        const char* openvino_device,
+        const char* openvino_cache_dir);
+
     // Frees all allocated memory
     WHISPER_API void whisper_free      (struct whisper_context * ctx);
     WHISPER_API void whisper_free_state(struct whisper_state * state);

From 93b8be46830214a3b7348fd9fdd81ad0c018fca8 Mon Sep 17 00:00:00 2001
From: "Metcalfe, Ryan" <ryan.metcalfe@intel.com>
Date: Wed, 21 Jun 2023 16:21:55 -0400
Subject: [PATCH 2/9] openvino: add python script for OpenVINO model generation

---
 models/convert-whisper-to-openvino.py       | 53 +++++++++++++++++++++
 models/openvino-conversion-requirements.txt |  2 +
 2 files changed, 55 insertions(+)
 create mode 100644 models/convert-whisper-to-openvino.py
 create mode 100644 models/openvino-conversion-requirements.txt

diff --git a/models/convert-whisper-to-openvino.py b/models/convert-whisper-to-openvino.py
new file mode 100644
index 00000000000..31cf29abdf5
--- /dev/null
+++ b/models/convert-whisper-to-openvino.py
@@ -0,0 +1,53 @@
+import argparse
+import torch
+from whisper import load_model
+import os
+from openvino.tools import mo
+from openvino.runtime import serialize
+import shutil
+
+def convert_encoder(hparams, encoder, mname):
+    encoder.eval()
+
+    mel = torch.zeros((1, 80, 3000))
+
+    onnx_folder=os.path.join(os.path.dirname(__file__),"onnx_encoder")
+
+    #create a directory to store the onnx model, and other collateral that is saved during onnx export procedure
+    if not os.path.isdir(onnx_folder):
+        os.makedirs(onnx_folder)
+
+    onnx_path = os.path.join(onnx_folder, "whisper_encoder.onnx")
+
+    torch.onnx.export(
+        encoder,
+        mel,
+        onnx_path,
+        input_names=["mel"],
+        output_names=["output_features"]
+    )
+
+    # use model optimizer to convert onnx to OpenVINO IR format
+    encoder_model = mo.convert_model(onnx_path, compress_to_fp16=True)
+    serialize(encoder_model, xml_path='ggml-' + mname + '-encoder-openvino.xml')
+
+    #cleanup
+    if os.path.isdir(onnx_folder):
+        shutil.rmtree(onnx_folder)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1)", required=True)
+    args = parser.parse_args()
+
+    if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1"]:
+        raise ValueError("Invalid model name")
+
+    whisper = load_model(args.model).cpu()
+    hparams = whisper.dims
+
+    encoder = whisper.encoder
+
+    # Convert encoder to onnx
+    convert_encoder(hparams, encoder, args.model)
diff --git a/models/openvino-conversion-requirements.txt b/models/openvino-conversion-requirements.txt
new file mode 100644
index 00000000000..5bfd95db88e
--- /dev/null
+++ b/models/openvino-conversion-requirements.txt
@@ -0,0 +1,2 @@
+openvino-dev[pytorch,onnx]
+openai-whisper
\ No newline at end of file

From 58eae32d14d1a385df78e1cdb9d542b1887cdd08 Mon Sep 17 00:00:00 2001
From: Ryan Metcalfe <ryan.metcalfe@intel.com>
Date: Thu, 22 Jun 2023 14:38:43 -0400
Subject: [PATCH 3/9] whisper: Fix 'unused' warnings when OpenVINO isn't
 enabled in build

---
 whisper.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/whisper.cpp b/whisper.cpp
index f1e7e17dd79..05ea677869d 100644
--- a/whisper.cpp
+++ b/whisper.cpp
@@ -2733,6 +2733,10 @@ int whisper_ctx_init_openvino_encoder(struct whisper_context* ctx,
     const char* openvino_cache_dir)
 {
 #ifndef WHISPER_USE_OPENVINO
+    (void)(ctx);
+    (void)(openvino_model_path);
+    (void)(openvino_device);
+    (void)(openvino_cache_dir);
     return 0;
 #else
     if (!openvino_model_path && ctx->path_model.empty())

From 4bc1ebcdc5cc738b1a7fff8c8ef7738a9283edd4 Mon Sep 17 00:00:00 2001
From: Ryan Metcalfe <107415876+RyanMetcalfeInt8@users.noreply.github.com>
Date: Wed, 28 Jun 2023 15:18:30 -0400
Subject: [PATCH 4/9] Apply suggestions from code review

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
---
 whisper.cpp |  6 +++---
 whisper.h   | 15 ++++++++-------
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/whisper.cpp b/whisper.cpp
index 05ea677869d..291d9092dd0 100644
--- a/whisper.cpp
+++ b/whisper.cpp
@@ -1785,7 +1785,7 @@ static bool whisper_encode_internal(
     }
 #endif
 #ifdef WHISPER_USE_OPENVINO
-    else if(use_openvino) {
+    else if (use_openvino) {
         wstate.use_buf(ctx0, -1);
 
         cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx);
@@ -2638,7 +2638,7 @@ static std::string whisper_get_coreml_path_encoder(std::string path_bin) {
 
 #ifdef WHISPER_USE_OPENVINO
 // replace .bin with-encoder-openvino.xml
-static std::string whisper_get_openvino_path_encoder(std::string path_bin) {
+static std::string whisper_get_openvino_path_encoder(const std::string & path_bin) {
     auto pos = path_bin.rfind('.');
     if (pos != std::string::npos) {
         path_bin = path_bin.substr(0, pos);
@@ -2649,7 +2649,7 @@ static std::string whisper_get_openvino_path_encoder(std::string path_bin) {
     return path_bin;
 }
 
-static std::string whisper_get_openvino_path_cache(std::string path_bin) {
+static std::string whisper_get_openvino_path_cache(const std::string & path_bin) {
     auto pos = path_bin.rfind('.');
     if (pos != std::string::npos) {
         path_bin = path_bin.substr(0, pos);
diff --git a/whisper.h b/whisper.h
index 7d70117d712..021dbe388ca 100644
--- a/whisper.h
+++ b/whisper.h
@@ -111,21 +111,22 @@ extern "C" {
     WHISPER_API struct whisper_state * whisper_init_state(struct whisper_context * ctx);
 
     // Given a context, enable use of OpenVINO for encode inference.
-    // openvino_model_path: Optional path to OpenVINO encoder IR model. If set to nullptr,
+    // model_path: Optional path to OpenVINO encoder IR model. If set to nullptr,
     //                      the path will be generated from the ggml model path that was passed
     //                      in to whisper_init_from_file. For example, if 'path_model' was
     //                      "/path/to/ggml-base.en.bin", then OpenVINO IR model path will be
     //                      assumed to be "/path/to/ggml-base.en-encoder-openvino.xml".
-    // openvino_device: OpenVINO device to run inference on ("CPU", "GPU", etc.)
-    // openvino_cache_dir: Optional cache directory that can speed up init time, especially for
+    // device: OpenVINO device to run inference on ("CPU", "GPU", etc.)
+    // cache_dir: Optional cache directory that can speed up init time, especially for
     //                     GPU, by caching compiled 'blobs' there.
     //                     Set to nullptr if not used.
     // Returns 1 on success. If OpenVINO is not enabled in build, this
     // simply returns 0.
-    WHISPER_API int whisper_ctx_init_openvino_encoder(struct whisper_context* ctx,
-        const char* openvino_model_path,
-        const char* openvino_device,
-        const char* openvino_cache_dir);
+    WHISPER_API int whisper_ctx_init_openvino_encoder(
+        struct whisper_context * ctx,
+                    const char * model_path,
+                    const char * device,
+                    const char * cache_dir);
 
     // Frees all allocated memory
     WHISPER_API void whisper_free      (struct whisper_context * ctx);

From 6bfa37112ff82295542ec2bf23f9dfafaae972fd Mon Sep 17 00:00:00 2001
From: "Metcalfe, Ryan" <ryan.metcalfe@intel.com>
Date: Wed, 28 Jun 2023 15:56:47 -0400
Subject: [PATCH 5/9] whisper: Fix compilation error

---
 whisper.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/whisper.cpp b/whisper.cpp
index 291d9092dd0..6fcc598fdad 100644
--- a/whisper.cpp
+++ b/whisper.cpp
@@ -2639,25 +2639,27 @@ static std::string whisper_get_coreml_path_encoder(std::string path_bin) {
 #ifdef WHISPER_USE_OPENVINO
 // replace .bin with-encoder-openvino.xml
 static std::string whisper_get_openvino_path_encoder(const std::string & path_bin) {
+    std::string openvino_path_encoder;
     auto pos = path_bin.rfind('.');
     if (pos != std::string::npos) {
-        path_bin = path_bin.substr(0, pos);
+        openvino_path_encoder = path_bin.substr(0, pos);
     }
 
-    path_bin += "-encoder-openvino.xml";
+    openvino_path_encoder += "-encoder-openvino.xml";
 
-    return path_bin;
+    return openvino_path_encoder;
 }
 
 static std::string whisper_get_openvino_path_cache(const std::string & path_bin) {
+    std::string openvino_path_cache;
     auto pos = path_bin.rfind('.');
     if (pos != std::string::npos) {
-        path_bin = path_bin.substr(0, pos);
+        openvino_path_cache = path_bin.substr(0, pos);
     }
 
-    path_bin += "-encoder-openvino-cache";
+    openvino_path_cache += "-encoder-openvino-cache";
 
-    return path_bin;
+    return openvino_path_cache;
 }
 #endif
 

From df77368fb54d99678342af43e708ef7cf4af7e85 Mon Sep 17 00:00:00 2001
From: "Metcalfe, Ryan" <ryan.metcalfe@intel.com>
Date: Wed, 28 Jun 2023 16:13:09 -0400
Subject: [PATCH 6/9] whisper: revert whisper_get_openvino_path_encoder &
 whisper_get_openvino_path_cache to non-const func signatures

---
 whisper.cpp | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/whisper.cpp b/whisper.cpp
index 6fcc598fdad..d7940a1b087 100644
--- a/whisper.cpp
+++ b/whisper.cpp
@@ -2638,28 +2638,26 @@ static std::string whisper_get_coreml_path_encoder(std::string path_bin) {
 
 #ifdef WHISPER_USE_OPENVINO
 // replace .bin with-encoder-openvino.xml
-static std::string whisper_get_openvino_path_encoder(const std::string & path_bin) {
-    std::string openvino_path_encoder;
+static std::string whisper_get_openvino_path_encoder(std::string path_bin) {
     auto pos = path_bin.rfind('.');
     if (pos != std::string::npos) {
-        openvino_path_encoder = path_bin.substr(0, pos);
+        path_bin = path_bin.substr(0, pos);
     }
 
-    openvino_path_encoder += "-encoder-openvino.xml";
+    path_bin += "-encoder-openvino.xml";
 
-    return openvino_path_encoder;
+    return path_bin;
 }
 
-static std::string whisper_get_openvino_path_cache(const std::string & path_bin) {
-    std::string openvino_path_cache;
+static std::string whisper_get_openvino_path_cache(std::string path_bin) {
     auto pos = path_bin.rfind('.');
     if (pos != std::string::npos) {
-        openvino_path_cache = path_bin.substr(0, pos);
+        path_bin = path_bin.substr(0, pos);
     }
 
-    openvino_path_cache += "-encoder-openvino-cache";
+    path_bin += "-encoder-openvino-cache";
 
-    return openvino_path_cache;
+    return path_bin;
 }
 #endif
 

From 76c41863933432b5f378c938cb27bacdc6b43912 Mon Sep 17 00:00:00 2001
From: "Metcalfe, Ryan" <ryan.metcalfe@intel.com>
Date: Wed, 28 Jun 2023 16:52:22 -0400
Subject: [PATCH 7/9] cmake: Add openvino-encoder as separate object target

---
 CMakeLists.txt | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e844e5af31c..4115e4d31e2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -304,8 +304,21 @@ if (WHISPER_COREML)
 endif()
 
 if (WHISPER_OPENVINO)
-    set( OpenVINO_SOURCES openvino/whisper-openvino-encoder.h openvino/whisper-openvino-encoder.cpp )
+    set(TARGET whisper.openvino)
+
+    add_library(${TARGET} OBJECT
+        openvino/whisper-openvino-encoder.h
+        openvino/whisper-openvino-encoder.cpp
+        )
+
+    target_include_directories(${TARGET} PUBLIC
+        .
+        )
+
+    set_property(TARGET ${TARGET} PROPERTY POSITION_INDEPENDENT_CODE ON)
     set( WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_OPENVINO)
+
+    target_link_libraries(${TARGET} PRIVATE openvino::runtime)
 endif()
 
 #
@@ -321,7 +334,6 @@ add_library(${TARGET}
     ${GGML_OPENCL_SOURCES}
     whisper.h
     whisper.cpp
-    ${OpenVINO_SOURCES}
     )
 
 include(DefaultTargetOptions)
@@ -335,7 +347,7 @@ if (WHISPER_COREML)
 endif()
 
 if (WHISPER_OPENVINO)
-    target_link_libraries(${TARGET} PRIVATE openvino::runtime)
+    target_link_libraries(${TARGET} PRIVATE whisper.openvino)
 endif()
 
 if (MSVC)

From bc5746e877474ca25a07c1e2c443ac8a77699365 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Tue, 4 Jul 2023 15:37:38 +0300
Subject: [PATCH 8/9] whisper : minor style fixes

---
 whisper.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/whisper.cpp b/whisper.cpp
index d7940a1b087..c4359a3ffef 100644
--- a/whisper.cpp
+++ b/whisper.cpp
@@ -657,7 +657,7 @@ struct whisper_state {
 #endif
 
 #ifdef WHISPER_USE_OPENVINO
-    whisper_openvino_context* ctx_openvino = nullptr;
+    whisper_openvino_context * ctx_openvino = nullptr;
 #endif
 
     // [EXPERIMENTAL] token-level timestamps data
@@ -1776,7 +1776,7 @@ static bool whisper_encode_internal(
         }
     }
 #ifdef WHISPER_USE_COREML
-    else if(use_coreml) {
+    else if (use_coreml) {
         wstate.use_buf(ctx0, -1);
 
         cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx);
@@ -1790,8 +1790,9 @@ static bool whisper_encode_internal(
 
         cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx);
 
-        if (!whisper_openvino_encode(wstate.ctx_openvino, mel, cur))
+        if (!whisper_openvino_encode(wstate.ctx_openvino, mel, cur)) {
             return false;
+        }
     }
 #endif
 

From df982879fcdb1988e6f6b746fa6f61c16163ff95 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Tue, 4 Jul 2023 15:41:22 +0300
Subject: [PATCH 9/9] minor : indentation fixes

---
 CMakeLists.txt         | 2 +-
 examples/main/main.cpp | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c1bc35f611d..88021e01470 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -316,7 +316,7 @@ if (WHISPER_OPENVINO)
         )
 
     set_property(TARGET ${TARGET} PROPERTY POSITION_INDEPENDENT_CODE ON)
-    set( WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_OPENVINO)
+    set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_OPENVINO)
 
     target_link_libraries(${TARGET} PRIVATE openvino::runtime)
 endif()
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 5dcd09b05ac..9a68367186d 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -157,7 +157,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
         else if (                  arg == "--prompt")          { params.prompt          = argv[++i]; }
         else if (arg == "-m"    || arg == "--model")           { params.model           = argv[++i]; }
         else if (arg == "-f"    || arg == "--file")            { params.fname_inp.emplace_back(argv[++i]); }
-        else if (arg == "-oved" || arg == "--ov-e-device")    { params.openvino_encode_device = argv[++i]; }
+        else if (arg == "-oved" || arg == "--ov-e-device")     { params.openvino_encode_device = argv[++i]; }
         else {
             fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
             whisper_print_usage(argc, argv, params);
@@ -210,7 +210,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
     fprintf(stderr, "             --prompt PROMPT     [%-7s] initial prompt\n",                                 params.prompt.c_str());
     fprintf(stderr, "  -m FNAME,  --model FNAME       [%-7s] model path\n",                                     params.model.c_str());
     fprintf(stderr, "  -f FNAME,  --file FNAME        [%-7s] input WAV file path\n",                            "");
-    fprintf(stderr, "  -oved D,   --ov-e-device DNAME [%-7s] The OpenVINO device used for encode inference\n",  params.openvino_encode_device.c_str());
+    fprintf(stderr, "  -oved D,   --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n",  params.openvino_encode_device.c_str());
     fprintf(stderr, "\n");
 }