From da53236bf3953509b716feac8458ad1d5a90b212 Mon Sep 17 00:00:00 2001
From: JohannesGaessler <johannesg@5d6.de>
Date: Tue, 8 Aug 2023 19:05:10 +0200
Subject: [PATCH 1/3] database writing works

---
 Makefile               |  2 +-
 examples/main/main.cpp | 37 +++++++++++++++++++++++++++++++++++++
 llama.cpp              |  9 +++++++++
 llama.h                |  2 ++
 4 files changed, 49 insertions(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 897c5cb9abcca..8772107ea123e 100644
--- a/Makefile
+++ b/Makefile
@@ -47,7 +47,7 @@ OPT = -O3
 endif
 CFLAGS   = -I.              $(OPT) -std=c11   -fPIC
 CXXFLAGS = -I. -I./examples $(OPT) -std=c++11 -fPIC
-LDFLAGS  =
+LDFLAGS  = -lsqlite3
 
 ifdef LLAMA_DEBUG
 	CFLAGS   += -O0 -g
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 56ada7e69d99d..fb4c828bc7ee9 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -17,8 +17,10 @@
 #include <ctime>
 #include <fstream>
 #include <iostream>
+#include <sstream>
 #include <string>
 #include <vector>
+#include <sqlite3.h>
 
 #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
 #include <signal.h>
@@ -163,6 +165,30 @@ int main(int argc, char ** argv) {
         return 0;
     }
 
+    sqlite3 * db = NULL;
+    int return_code;
+    const int flags = SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE;
+    return_code = sqlite3_open_v2("llama.sqlite", &db, flags, NULL);
+    fprintf(stderr, "\nsqlite open: %d %s\n\n", return_code, sqlite3_errmsg(db));
+
+    const std::string sql_create_table ="CREATE TABLE IF NOT EXISTS llama_runs("
+        "id           INTEGER PRIMARY KEY AUTOINCREMENT,"
+        "build_number INTEGER NOT NULL,"
+        "build_commit TEXT NOT NULL,"
+
+        "n_gpu_layers BIGINT NOT NULL,"
+
+        "t_sample_us  BIGINT NOT NULL,"
+        "t_eval_us    BIGINT NOT NULL,"
+        "t_p_eval_us  BIGINT NOT NULL,"
+        "n_sample     BIGINT NOT NULL,"
+        "n_eval       BIGINT NOT NULL,"
+        "n_p_eval     BIGINT NOT NULL);";
+
+    char * errmsg;
+    return_code = sqlite3_exec(db, sql_create_table.c_str(), NULL, NULL, &errmsg);
+    fprintf(stderr, "\nsqlite create table: %d %s\n\n", return_code, errmsg);
+
     std::string path_session = params.path_prompt_cache;
     std::vector<llama_token> session_tokens;
 
@@ -808,6 +834,17 @@ int main(int argc, char ** argv) {
     }
 
     llama_print_timings(ctx);
+
+    std::ostringstream sql_insert_values;
+    sql_insert_values << "INSERT INTO llama_runs(build_number, build_commit, n_gpu_layers, "
+        "t_sample_us, t_eval_us, t_p_eval_us, n_sample, n_eval, n_p_eval) VALUES (";
+    sql_insert_values << BUILD_NUMBER << ",";
+    sql_insert_values << "'" << BUILD_COMMIT << "',";
+    sql_insert_values << params.n_gpu_layers << ",";
+    llama_sqlite_append_timings(ctx, sql_insert_values);
+    return_code = sqlite3_exec(db, sql_insert_values.str().c_str(), NULL, NULL, &errmsg);
+    fprintf(stderr, "\nsqlite insert data: %d %s\n\n", return_code, errmsg);
+
     if (ctx_guidance) { llama_free(ctx_guidance); }
     llama_free(ctx);
     llama_free_model(model);
diff --git a/llama.cpp b/llama.cpp
index 71061aab910ef..a9256121b7070 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -4243,6 +4243,15 @@ void llama_print_timings(struct llama_context * ctx) {
     fprintf(stderr, "%s:       total time = %8.2f ms\n", __func__, (timings.t_end_ms - timings.t_start_ms));
 }
 
+void llama_sqlite_append_timings(struct llama_context * ctx, std::ostringstream & sql_insert_values) {
+    sql_insert_values << ctx->t_sample_us << ",";
+    sql_insert_values << ctx->t_eval_us   << ",";
+    sql_insert_values << ctx->t_p_eval_us << ",";
+    sql_insert_values << ctx->n_sample    << ",";
+    sql_insert_values << ctx->n_eval      << ",";
+    sql_insert_values << ctx->n_p_eval    << ");";
+}
+
 void llama_reset_timings(struct llama_context * ctx) {
     ctx->t_start_us = ggml_time_us();
     ctx->t_sample_us = ctx->n_sample = 0;
diff --git a/llama.h b/llama.h
index fa1977f2d9492..8585942efc91c 100644
--- a/llama.h
+++ b/llama.h
@@ -8,6 +8,7 @@
 #else
 #define LLAMA_MAX_DEVICES 1
 #endif // GGML_USE_CUBLAS
+#include <sstream>
 #include <stddef.h>
 #include <stdint.h>
 #include <stdbool.h>
@@ -446,6 +447,7 @@ extern "C" {
     // Performance information
     LLAMA_API struct llama_timings llama_get_timings(struct llama_context * ctx);
     LLAMA_API void llama_print_timings(struct llama_context * ctx);
+    LLAMA_API void llama_sqlite_append_timings(struct llama_context * ctx, std::ostringstream & sql_insert_values);
     LLAMA_API void llama_reset_timings(struct llama_context * ctx);
 
     // Print system information

From 97c809448ff106ca8084a8775f14e2f8af71f325 Mon Sep 17 00:00:00 2001
From: JohannesGaessler <johannesg@5d6.de>
Date: Tue, 8 Aug 2023 19:34:31 +0200
Subject: [PATCH 2/3] add plotting files

---
 benchmark.sh       |  6 ++++++
 plot_ts_per_ngl.py | 23 +++++++++++++++++++++++
 2 files changed, 29 insertions(+)
 create mode 100755 benchmark.sh
 create mode 100644 plot_ts_per_ngl.py

diff --git a/benchmark.sh b/benchmark.sh
new file mode 100755
index 0000000000000..67d628698ed25
--- /dev/null
+++ b/benchmark.sh
@@ -0,0 +1,6 @@
+#!/usr/bin/env sh
+
+for ngl in {0..35}
+do
+    ./main --model models/nvme/llama-7b-ggml-q4_0.bin --seed 1337 --ignore-eos --n-predict 128 --ctx-size 2048 --threads 8 -ngl $ngl -mmq
+done
diff --git a/plot_ts_per_ngl.py b/plot_ts_per_ngl.py
new file mode 100644
index 0000000000000..b922cee7bbcf9
--- /dev/null
+++ b/plot_ts_per_ngl.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+
+import sqlite3
+import matplotlib.pyplot as plt
+
+con = sqlite3.connect("llama.sqlite")
+cur = con.cursor()
+
+ts = []
+
+for ngl in range(0, 36):
+    res = cur.execute(f"SELECT t_eval_us,n_eval FROM llama_runs WHERE n_gpu_layers={ngl};")
+    t_eval_us, n_eval = res.fetchone()
+    ts.append(n_eval * 1000000/t_eval_us)
+
+plt.plot(ts)
+plt.xlim(0, 35)
+plt.ylim(0, 130)
+plt.title("7b q4_0, 3700X, 3200 MHz dual-channel RAM, RTX 3090")
+plt.xlabel("-ngl")
+plt.ylabel("Generated t/s")
+plt.savefig("benchmark.png", dpi=240)
+plt.show()

From 829565b13dbb59411e22c89a5de50636a26c701c Mon Sep 17 00:00:00 2001
From: JohannesGaessler <johannesg@5d6.de>
Date: Wed, 9 Aug 2023 01:31:26 +0200
Subject: [PATCH 3/3] better SQL

---
 plot_ts_per_ngl.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/plot_ts_per_ngl.py b/plot_ts_per_ngl.py
index b922cee7bbcf9..de056b0ed256c 100644
--- a/plot_ts_per_ngl.py
+++ b/plot_ts_per_ngl.py
@@ -1,19 +1,16 @@
 #!/usr/bin/env python3
 
 import sqlite3
+import numpy as np
 import matplotlib.pyplot as plt
 
 con = sqlite3.connect("llama.sqlite")
 cur = con.cursor()
 
-ts = []
+res = cur.execute("SELECT n_gpu_layers, 1000000.0*n_eval/t_eval_us FROM llama_runs ORDER BY n_gpu_layers;")
+ts = np.array(res.fetchall())
 
-for ngl in range(0, 36):
-    res = cur.execute(f"SELECT t_eval_us,n_eval FROM llama_runs WHERE n_gpu_layers={ngl};")
-    t_eval_us, n_eval = res.fetchone()
-    ts.append(n_eval * 1000000/t_eval_us)
-
-plt.plot(ts)
+plt.plot(ts[:, 0], ts[:, 1])
 plt.xlim(0, 35)
 plt.ylim(0, 130)
 plt.title("7b q4_0, 3700X, 3200 MHz dual-channel RAM, RTX 3090")