diff --git a/ggml/src/ggml-cann/aclnn_ops.cpp b/ggml/src/ggml-cann/aclnn_ops.cpp
index 437ece2d4a3cf..6751652c9519f 100755
--- a/ggml/src/ggml-cann/aclnn_ops.cpp
+++ b/ggml/src/ggml-cann/aclnn_ops.cpp
@@ -714,6 +714,16 @@ static void cann_copy(ggml_backend_cann_context& ctx, aclTensor* acl_src,
 void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
     ggml_tensor* src0 = dst->src[0];
 
+    // ---- TODO: ----
+    // if (ctx.cann_graph && ctx.cann_graph->use_cpy_indirection) {
+    //     int &idx = ctx.cann_graph->graph_cpynode_index;  // 引用，方便自增
+    //     if (idx < ctx.cann_graph->dest_ptrs_size && ctx.cann_graph->dest_ptrs_d != nullptr) {
+    //         dst->data = ggml_cann_get_dynamic_dst_ptr(ctx, idx);
+    //         idx++;
+    //     } else {
+    //         std::cerr << "Warning: graph_cpynode_index out of range or dest_ptrs_d null!" << std::endl;
+    //     }
+    // }
     aclTensor* acl_src = ggml_cann_create_tensor(src0);
     aclTensor* acl_dst = ggml_cann_create_tensor(dst);
     if (ggml_are_same_shape(src0, dst)) {
diff --git a/ggml/src/ggml-cann/common.h b/ggml/src/ggml-cann/common.h
index 7ef80a4793314..416117088943c 100755
--- a/ggml/src/ggml-cann/common.h
+++ b/ggml/src/ggml-cann/common.h
@@ -333,6 +333,58 @@ class cann_task_queue {
     int32_t device_;
 };
 
+// TODO: 删除 //
+// #if defined(GGML_CANN_USE_GRAPHS)
+#define USE_CANN_GRAPH
+// #endif
+
+struct ggml_graph_node_properties {
+    void * node_address;
+    ggml_op node_op;
+    int64_t ne[GGML_MAX_DIMS];
+    size_t nb[GGML_MAX_DIMS];
+    void * src_address[GGML_MAX_SRC];
+    int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
+};
+
+struct ggml_cann_graph {
+#ifdef USE_CANN_GRAPH
+    ~ggml_cann_graph() {
+        // if (instance != nullptr) {
+        //     aclmdlRIDestroy(instance);
+        // }
+        if (graph != nullptr) {
+            aclmdlRIDestroy(graph);
+        }
+    }
+
+    aclmdlRI graph = nullptr;
+    // aclmdlRI instance = nullptr;
+
+    size_t num_nodes = 0;
+
+    // std::vector<aclGraphNode*> nodes;
+    std::vector<aclopAttr*> op_attrs;
+    std::vector<aclTensor*> input_tensors;
+    std::vector<aclTensor*> output_tensors;
+
+    // bool disable_due_to_npu_arch = false;
+    bool disable_due_to_too_many_updates = false;
+    // bool disable_due_to_failed_graph_capture = false;
+    int number_consecutive_updates = 0;
+
+    std::vector<ggml_graph_node_properties> ggml_graph_properties;
+
+    // TODO: user cpy indirection
+    bool use_cpy_indirection = false;
+    std::vector<char *> cpy_dest_ptrs;
+    char ** dest_ptrs_d = nullptr;
+    int dest_ptrs_size = 0;
+
+    int graph_cpynode_index = -1;
+#endif  // USE_CANN_GRAPH
+};
+
 /**
  * @brief Context for managing CANN backend operations.
  */
@@ -341,6 +393,7 @@ struct ggml_backend_cann_context {
     std::string name;                /**< Name of the device. */
     std::string description;         /**< Description of the device. */
     aclrtEvent copy_event = nullptr; /**< Event for managing copy operations. */
+    std::unique_ptr<ggml_cann_graph> cann_graph;
     cann_task_queue task_queue;
     bool async_mode;
 
diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp
index c0ea26002196f..7be7ea1cda1d9 100755
--- a/ggml/src/ggml-cann/ggml-cann.cpp
+++ b/ggml/src/ggml-cann/ggml-cann.cpp
@@ -1943,6 +1943,255 @@ static void ggml_backend_cann_synchronize(ggml_backend_t backend) {
     ACL_CHECK(aclrtSynchronizeStream(cann_ctx->stream()));
 }
 
+#ifdef USE_CANN_GRAPH
+
+void ggml_cann_cpy_dest_ptrs_copy(ggml_cann_graph * cann_graph, char ** host_dest_ptrs, const int host_dest_ptrs_size, aclrtStream stream) {
+#ifdef USE_CANN_GRAPH
+    if (host_dest_ptrs_size == 0 || host_dest_ptrs == nullptr) {
+        return;
+    }
+
+    if (cann_graph->dest_ptrs_size < host_dest_ptrs_size) { // (re-)allocate NPU memory for destination pointers
+        ACL_CHECK(aclrtSynchronizeStream(stream));
+        if (cann_graph->dest_ptrs_d != nullptr) {
+            std::cout << "lcg ggml_cann_cpy_dest_ptrs_copy0" << std::endl;
+            ACL_CHECK(aclrtFree(cann_graph->dest_ptrs_d));
+        }
+        // TODO: check ACL_MEM_MALLOC_NORMAL_ONLY
+        char * device_ptr = nullptr;
+        ACL_CHECK(aclrtMalloc((void **)&device_ptr, host_dest_ptrs_size*sizeof(char *), ACL_MEM_MALLOC_NORMAL_ONLY));
+        cann_graph->dest_ptrs_d = (char **) device_ptr;
+        cann_graph->dest_ptrs_size = host_dest_ptrs_size;
+    }
+    // copy destination pointers to NPU
+    // cann_graph->dest_ptrs_d   (Device Pointer, 存在 device 上)
+    //           │
+    //           │
+    //           ▼
+    //   +---------------------+
+    //   | 0x600000000         |  <-- device copy address1
+    //   +---------------------+
+    //   | 0x600010000         |  <-- device copy address2
+    //   +---------------------+
+    //   | 0x600020000         |  <-- device copy address3
+    //   +---------------------+
+    ACL_CHECK(aclrtMemcpy(cann_graph->dest_ptrs_d, host_dest_ptrs_size*sizeof(char *), host_dest_ptrs, host_dest_ptrs_size*sizeof(char *),
+                              ACL_MEMCPY_HOST_TO_DEVICE));
+                              
+    ACL_CHECK(aclrtSynchronizeStream(stream));
+    cann_graph->graph_cpynode_index = 0; // reset index
+#else
+    GGML_UNUSED(cann_graph); GGML_UNUSED(host_dest_ptrs);
+    GGML_UNUSED(host_dest_ptrs_size); GGML_UNUSED(stream);
+#endif
+}
+
+static bool check_node_graph_compatibility_and_refresh_copy_ops(ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph,
+    bool use_cann_graph) {
+    // Loop over nodes in GGML graph to obtain info needed for CANN graph
+    cann_ctx->cann_graph->cpy_dest_ptrs.clear();
+
+    for (int i = 0; i < cgraph->n_nodes; i++) {
+        ggml_tensor * node = cgraph->nodes[i];
+
+        if (ggml_is_empty(node) || node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE || node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_NONE) {
+            continue;
+        }
+
+        if (node->op == GGML_OP_MUL_MAT_ID && node->ne[2] != 1) {
+            // std::cout << "lcg check_node_graph_compatibility_and_refresh_copy_ops start1:" << std::endl;
+            use_cann_graph = false; // This node type is not supported by CANN graph capture
+#ifndef NDEBUG
+            GGML_LOG_DEBUG("%s: disabling CANN graphs due to unsupported node type\n", __func__);
+#endif
+        }
+
+//         if (node->op == GGML_OP_ADD && node->src[1] && node->src[1]->ne[1] > 1) {
+//             // disable CANN graphs for batch size > 1 for now.
+//             // Changes in batch size or context size can cause changes to the grid size of some kernels.
+//             use_cann_graph = false;
+// #ifndef NDEBUG
+//             GGML_LOG_DEBUG("%s: disabling CANN graphs due to batch size > 1 [%s] [%ld %ld %ld %ld]\n", __func__, node->name, node->ne[0], node->ne[1], node->ne[2], node->ne[3]);
+// #endif
+//         }
+        if (node->op == GGML_OP_CPY) {
+            // Store the pointers which are updated for each token, such that these can be sent
+            // to the device and accessed using indirection from CANN graph
+            cann_ctx->cann_graph->cpy_dest_ptrs.push_back((char *) node->src[1]->data);
+        }
+        
+        if (!use_cann_graph) {
+            break;
+        }
+    }
+
+    if (use_cann_graph) {
+        cann_ctx->cann_graph->use_cpy_indirection = true;
+        // copy pointers to NPU so they can be accessed via indirection within CANN graph
+        ggml_cann_cpy_dest_ptrs_copy(cann_ctx->cann_graph.get(), cann_ctx->cann_graph->cpy_dest_ptrs.data(), cann_ctx->cann_graph->cpy_dest_ptrs.size(), cann_ctx->stream());
+    }
+
+    return use_cann_graph;
+}
+
+static void set_ggml_graph_node_properties(ggml_tensor * node, ggml_graph_node_properties * graph_node_properties) {
+    graph_node_properties->node_address = node->data;
+    graph_node_properties->node_op = node->op;
+    for (int i = 0; i < GGML_MAX_DIMS; i++) {
+        graph_node_properties->ne[i] = node->ne[i];
+        graph_node_properties->nb[i] = node->nb[i];
+    }
+    for (int i = 0; i < GGML_MAX_SRC; i++) {
+        graph_node_properties->src_address[i] = node->src[i] ? node->src[i]->data : nullptr;
+    }
+    memcpy(graph_node_properties->op_params, node->op_params, GGML_MAX_OP_PARAMS);
+}
+
+static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_graph_node_properties * graph_node_properties) {
+    if (node->data != graph_node_properties->node_address &&
+          node->op != GGML_OP_CPY &&
+          node->op != GGML_OP_VIEW) {
+        return false;
+    }
+
+    if (node->op != graph_node_properties->node_op) {
+        return false;
+    }
+
+    for (int i = 0; i < GGML_MAX_DIMS; i++) {
+        if (node->ne[i] != graph_node_properties->ne[i]) {
+            return false;
+        }
+        if (node->nb[i] != graph_node_properties->nb[i]) {
+            return false;
+        }
+    }
+
+    for (int i = 0; i < GGML_MAX_SRC; i++) {
+        if (node->src[i] &&
+            node->src[i]->data != graph_node_properties->src_address[i] &&
+            node->op != GGML_OP_CPY &&
+            node->op != GGML_OP_VIEW
+        ) {
+            return false;
+        }
+    }
+
+    if (node->op == GGML_OP_SCALE &&
+        memcmp(graph_node_properties->op_params, node->op_params, GGML_MAX_OP_PARAMS) != 0) {
+        return false;
+    }
+
+    return true;
+}
+
+static bool is_cann_graph_update_required(ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph) {
+    bool cann_graph_update_required = false;
+
+    if (cann_ctx->cann_graph->graph == nullptr) {
+        cann_graph_update_required = true;
+    }
+
+    // Check if the graph size has changed
+    if (cann_ctx->cann_graph->ggml_graph_properties.size() != (size_t)cgraph->n_nodes) {
+        cann_graph_update_required = true;
+        cann_ctx->cann_graph->ggml_graph_properties.resize(cgraph->n_nodes);
+    }
+
+    // Loop over nodes in GGML graph to determine if CANN graph update is required
+    // and store properties to allow this comparison for the next token
+    for (int i = 0; i < cgraph->n_nodes; i++) {
+        bool has_matching_properties = true;
+        if (!cann_graph_update_required) {
+            // std::cout << "lcg is_cann_graph_update_required start3:" << std::endl;
+            has_matching_properties = ggml_graph_node_has_matching_properties(cgraph->nodes[i], &cann_ctx->cann_graph->ggml_graph_properties[i]);
+        }
+        if (!has_matching_properties) {
+            // std::cout << "lcg is_cann_graph_update_required start4:" << std::endl;
+            cann_graph_update_required = true;
+        }
+        set_ggml_graph_node_properties(cgraph->nodes[i], &cann_ctx->cann_graph->ggml_graph_properties[i]);
+    }
+
+    return cann_graph_update_required;
+}
+
+static void update_cann_graph_executable(ggml_backend_cann_context * cann_ctx) {
+    if (cann_ctx->cann_graph->graph != nullptr) {
+        // Destroy the existing graph instance
+        ACL_CHECK(aclmdlRIDestroy(cann_ctx->cann_graph->graph));
+        cann_ctx->cann_graph->graph = nullptr;
+    }
+
+    // Begin graph capture
+    ACL_CHECK(aclmdlRICaptureBegin(cann_ctx->stream(), ACL_MODEL_RI_CAPTURE_MODE_GLOBAL));
+
+    // TODO: Run the GGML graph (this will record operations into the graph)
+    // ggml_cann_compute_forward();
+
+    // // End graph capture and obtain the new graph instance
+    // aclrtGraph new_graph = nullptr;
+    // ACL_CHECK(aclmdlRICaptureEnd(cann_ctx->stream(), ACL_MODEL_RI_CAPTURE_MODE_GLOBAL));
+
+    // cann_ctx->cann_graph->graph = new_graph;
+#ifndef NDEBUG
+    GGML_LOG_DEBUG("%s: CANN graph recreated and updated.\n", __func__);
+#endif
+}
+
+#endif  // USE_CANN_GRAPH
+
+static void evaluate_and_capture_cann_graph(ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph,
+    bool & graph_evaluated_or_captured, bool & use_cann_graph, bool & cann_graph_update_required) {
+    // std::cout << "lcg evaluate_and_capture_cann_graph start:" << use_cann_graph << "," << cann_graph_update_required << std::endl;
+    while (!graph_evaluated_or_captured) {
+#ifdef USE_CANN_GRAPH
+        // Start CANN graph capture
+        if (use_cann_graph && cann_graph_update_required) {
+            if (cann_ctx->cann_graph->graph != nullptr) {
+                ACL_CHECK(aclmdlRIDestroy(cann_ctx->cann_graph->graph));
+                cann_ctx->cann_graph->graph = nullptr;
+            }
+            ACL_CHECK(aclmdlRICaptureBegin(cann_ctx->stream(), ACL_MODEL_RI_CAPTURE_MODE_GLOBAL));
+        }
+#endif  // USE_CANN_GRAPH
+
+        // Only perform the graph execution if CANN graphs are not enabled, or we are capturing the graph.
+        // With the use of CANN graphs, the execution will be performed by the graph launch.
+        if (!use_cann_graph || cann_graph_update_required) {
+            for (int i = 0; i < cgraph->n_nodes; i++) {
+                ggml_tensor * node = cgraph->nodes[i];
+
+                if (ggml_is_empty(node) || node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE || node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_NONE) {
+                    continue;
+                }
+
+                bool ok = ggml_cann_compute_forward(*cann_ctx, node);
+                if (!ok) {
+                    GGML_LOG_ERROR("%s: op not supported %s (%s)\n", __func__, node->name, ggml_op_name(node->op));
+                }
+                GGML_ASSERT(ok);
+            }
+        }
+
+#ifdef USE_CANN_GRAPH
+        if (use_cann_graph && cann_graph_update_required) { // End CANN graph capture
+            ACL_CHECK(aclmdlRICaptureEnd(cann_ctx->stream(), &cann_ctx->cann_graph->graph));
+            graph_evaluated_or_captured = true; // CANN graph has been captured
+        } else {
+            graph_evaluated_or_captured = true; // ggml graph has been directly evaluated
+        }
+    }
+
+    if (use_cann_graph) {
+        // Launch graph
+        ACL_CHECK(aclmdlRIExecuteAsync(cann_ctx->cann_graph->graph, cann_ctx->stream()));
+#else
+        graph_evaluated_or_captured = true;
+#endif  // USE_CANN_GRAPH
+    }
+}
+
 /**
  * @brief Computes a computational graph using a CANN backend.
  *
@@ -1955,29 +2204,53 @@ static void ggml_backend_cann_synchronize(ggml_backend_t backend) {
  * @return enum ggml_status Returns GGML_STATUS_SUCCESS if computation
  *         completes successfully, otherwise an appropriate error status.
  */
-static enum ggml_status ggml_backend_cann_graph_compute(
-    ggml_backend_t backend, ggml_cgraph* cgraph) {
-    ggml_backend_cann_context* cann_ctx =
-        (ggml_backend_cann_context*)backend->context;
-
+static enum ggml_status ggml_backend_cann_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
+    ggml_backend_cann_context * cann_ctx = (ggml_backend_cann_context *)backend->context;
     ggml_cann_set_device(cann_ctx->device);
 
-    for (int i = 0; i < cgraph->n_nodes; i++) {
-        ggml_tensor* node = cgraph->nodes[i];
+#ifdef USE_CANN_GRAPH
+    bool use_cann_graph = true;
+    bool cann_graph_update_required = true;
 
-        if (ggml_is_empty(node) || node->op == GGML_OP_NONE) {
-            continue;
-        }
+    static const bool disable_cann_graphs_due_to_env = (getenv("GGML_CANN_DISABLE_GRAPHS") != nullptr);
+    if (disable_cann_graphs_due_to_env) {
+        use_cann_graph = false;
+    }
 
-        bool ok = ggml_cann_compute_forward(*cann_ctx, node);
+    // Objects required for CANN Graph
+    if (cann_ctx->cann_graph == nullptr) {
+        cann_ctx->cann_graph.reset(new ggml_cann_graph());
+    }
+    
+    if (use_cann_graph) {
+        cann_graph_update_required = is_cann_graph_update_required(cann_ctx, cgraph);
 
-        if (!ok) {
-            GGML_LOG_ERROR("%s: error: op not supported %s (%s)\n", __func__,
-                    node->name, ggml_op_name(node->op));
+        use_cann_graph = check_node_graph_compatibility_and_refresh_copy_ops(cann_ctx, cgraph, use_cann_graph);
+
+        // Disable CANN graphs (from the next token) if the use-case is demanding too many consecutive graph updates.
+        if (use_cann_graph && cann_graph_update_required) {
+            cann_ctx->cann_graph->number_consecutive_updates++;
+        } else {
+            cann_ctx->cann_graph->number_consecutive_updates = 0;
         }
-        GGML_ASSERT(ok);
+
+//         if (cann_ctx->cann_graph->number_consecutive_updates >= 4) {
+//             cann_ctx->cann_graph->disable_due_to_too_many_updates = true;
+// #ifndef NDEBUG
+//             GGML_LOG_DEBUG("%s: disabling CANN graphs due to too many consecutive updates\n", __func__);
+// #endif
+//         }
     }
 
+#else
+    bool use_cann_graph = false;
+    bool cann_graph_update_required = false;
+#endif // USE_CANN_GRAPH
+
+    bool graph_evaluated_or_captured = false;
+
+    evaluate_and_capture_cann_graph(cann_ctx, cgraph, graph_evaluated_or_captured, use_cann_graph, cann_graph_update_required);
+
     return GGML_STATUS_SUCCESS;
 }