Update llama.cpp

abetlen · abetlen · commit d8a3ddbb1cf4 · 2023-07-24T13:08:06.000-04:00
diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
@@ -163,6 +163,7 @@ class llama_token_data_array(Structure):
 #     int32_t  n_ctx;        // text context
 #     int32_t  n_batch;      // prompt processing batch size
 #     int32_t  n_gqa;        // grouped-query attention (TEMP - will be moved to model hparams)
+#     float    rms_norm_eps; // rms norm epsilon (TEMP - will be moved to model hparams)
 #     int32_t  n_gpu_layers; // number of layers to store in VRAM
 #     int32_t  main_gpu;     // the GPU that is used for scratch and small tensors
 #
@@ -193,6 +194,7 @@ class llama_context_params(Structure):
         ("n_ctx", c_int32),
         ("n_batch", c_int32),
         ("n_gqa", c_int32),
+        ("rms_norm_eps", c_float),
         ("n_gpu_layers", c_int32),
         ("main_gpu", c_int32),
         ("tensor_split", POINTER(c_float)),
diff --git a/vendor/llama.cpp b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit 84e09a7d8bc4ab6d658b5cd81295ac0add60be78
+Subproject commit 41c674161fb2459bdf7806d1eebead15bc5d046e