diff --git a/README.md b/README.md
index e371c44ed1b6b..ab89ca3e9ceac 100644
--- a/README.md
+++ b/README.md
@@ -245,6 +245,129 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
 | [CANN](docs/build.md#cann) | Ascend NPU |
 | [OpenCL](docs/backend/OPENCL.md) | Adreno GPU |
 
+## Software architecture
+```mermaid
+block-beta
+columns 1
+
+block:llamacpp
+  llamacpp["llama_cpp"]
+  style llamacpp        fill:#3c3,color:#000,stroke:#000
+end
+
+block:ggml_backend
+ggml_backend["GGML backend subsystem"]
+  style ggml_backend    fill:#3c3,color:#000,stroke:#000
+
+block:ggmlbackends
+ ggml_cpu["ggml-cpu"]
+   ggml_metal["ggml-metal"]
+   ggml_sycl["ggml-sycl"]
+   ggml_cuda["ggml-cuda"]
+   ggml_hip["ggml-hip"]
+   ggml_vulkan["ggml-vulkan"]
+   ggml_cann["ggml-cann"]
+   ggml_opencl["ggml-opencl"]
+   ggml_hexagon["ggml-hexagon"]
+   ggml_nnpa["ggml-nnpa"]
+   ggml_ane["ggml-ane"]
+
+   style ggml_cpu       fill:#888,color:#000,stroke:#000
+   style ggml_metal     fill:#888,color:#000,stroke:#000
+   style ggml_sycl      fill:#888,color:#000,stroke:#000
+   style ggml_cuda      fill:#888,color:#000,stroke:#000
+   style ggml_hip       fill:#888,color:#000,stroke:#000
+   style ggml_vulkan    fill:#888,color:#000,stroke:#000
+   style ggml_cann      fill:#888,color:#000,stroke:#000
+
+   style ggml_opencl    fill:#cc3,color:#000,stroke:#000
+   style ggml_hexagon       fill:#cc3,color:#000,stroke:#000
+   style ggml_ane       fill:#fff,color:#000,stroke:#f00,stroke-width:2,stroke-dasharray:5
+   style ggml_nnpa      fill:#cc3,color:#000,stroke:#000
+  end
+end
+
+block:ggml_pal
+  ggml_pal["Platform Abstraction Layer"]
+  style ggml_pal fill:#c33,color:#000,stroke:#000
+end
+
+block:ggml_backendsubsystem
+  ggml_backendsubsystem["GGML backend subsystem"]
+  style ggml_backendsubsystem fill:#3c3,color:#000,stroke:#000
+end
+
+block:group1：2
+  columns 2
+  block:ggml_tensor
+  ggml_tensor["GGML tensor"]
+  style ggml_tensor fill:#3c3,color:#000,stroke:#000
+  end
+
+  block:ggml_cgraph
+  ggml_cgraph["GGML cgraph"]
+  style ggml_cgraph  fill:#3c3,color:#000,stroke:#000
+  end
+end
+
+block:OS
+    Windows
+    Linux
+    Android
+    QNX
+end
+
+block:hardware_vendors
+    Intel
+    AMD
+    Apple
+    Nvidia
+    Huawei
+    Loongson
+    Qualcomm
+    IBM
+
+    ggml_metal  --> Apple
+    ggml_cuda   --> Nvidia
+    ggml_hip    --> AMD
+    ggml_cann   --> Huawei
+    ggml_sycl   --> Intel
+    ggml_opencl --> Qualcomm
+    ggml_hexagon    --> Qualcomm
+    ggml_ane    --> Apple
+    ggml_nnpa   --> IBM
+end
+
+block:hardware_types
+    CPU
+    GPU
+    NPU
+end
+
+block:hardware_archs
+    x86
+    arm
+    risc
+    dsp
+    loongson
+end
+```
+
+```mermaid
+%%{init: {"flowchart": {"htmlLabels": false, 'nodeSpacing': 30, 'rankSpacing': 30}} }%%
+flowchart LR
+    classDef EXIST fill:#888,color:#000,stroke:#000
+    classDef DONE fill:#3c3,color:#000,stroke:#000
+    classDef WIP fill:#cc3,color:#000,stroke:#000
+    classDef TODO fill:#c33,color:#000,stroke:#000
+    classDef NEW fill:#fff,color:#000,stroke:#f00,stroke-width:2,stroke-dasharray:5
+    subgraph Legend
+      direction LR
+      EXIST:::EXIST ~~~ TODO:::TODO ~~~ WIP:::WIP ~~~ DONE:::DONE ~~~ NEW:::NEW
+    end
+```
+
+
 ## Building the project
 
 The main product of this project is the `llama` library. Its C-style interface can be found in [include/llama.h](include/llama.h).