From 564a05daf25486e89872b7e7b5c92ce76c56eb31 Mon Sep 17 00:00:00 2001 From: Neo Zhang Jianyu Date: Tue, 8 Apr 2025 10:29:41 +0800 Subject: [PATCH 1/4] =?UTF-8?q?Revert=20"sycl:=20remove=20redundant=20memc?= =?UTF-8?q?opy=20in=20function=20ggml=5Fbackend=5Fsycl=5Fbuffer=5Fs?= =?UTF-8?q?=E2=80=A6"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 518a01480eb3a7c80a4951b430db9dee55428310. --- ggml/src/ggml-sycl/ggml-sycl.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index f6f288d990639..dff9f8d4c4ac2 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -372,9 +372,12 @@ static void ggml_backend_sycl_buffer_set_tensor(ggml_backend_buffer_t buffer, auto stream = &(dpct::dev_mgr::instance().get_device(ctx->device).default_queue()); SYCL_CHECK( CHECK_TRY_ERROR(dpct::dev_mgr::instance().get_device(ctx->device).queues_wait_and_throw())); + char* host_buf = (char*)malloc(size); + memcpy(host_buf, data, size); SYCL_CHECK( - CHECK_TRY_ERROR((*stream).memcpy((char *)tensor->data + offset, data, size) + CHECK_TRY_ERROR((*stream).memcpy((char *)tensor->data + offset, host_buf, size) .wait())); + free(host_buf); } catch (sycl::exception const &exc) { std::cerr << exc.what() << "Exception caught at file:" << __FILE__ From d271172ab1769bcf1ddf1cbf4179d0fe1014e94f Mon Sep 17 00:00:00 2001 From: Neo Zhang Jianyu Date: Tue, 8 Apr 2025 10:32:18 +0800 Subject: [PATCH 2/4] Update ggml/src/ggml-sycl/ggml-sycl.cpp --- ggml/src/ggml-sycl/ggml-sycl.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index dff9f8d4c4ac2..a01e92b1b784e 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -372,6 +372,7 @@ static void ggml_backend_sycl_buffer_set_tensor(ggml_backend_buffer_t buffer, auto stream = &(dpct::dev_mgr::instance().get_device(ctx->device).default_queue()); SYCL_CHECK( CHECK_TRY_ERROR(dpct::dev_mgr::instance().get_device(ctx->device).queues_wait_and_throw())); + //note: use host buffer to save the data from mmap(), then copy to device. It's workaround for mmap() issue on PVC GPU. char* host_buf = (char*)malloc(size); memcpy(host_buf, data, size); SYCL_CHECK( From 76f2ed3d77048a9328daa7e696da156980cd17a6 Mon Sep 17 00:00:00 2001 From: Neo Zhang Jianyu Date: Tue, 8 Apr 2025 13:16:14 +0800 Subject: [PATCH 3/4] Update ggml/src/ggml-sycl/ggml-sycl.cpp --- ggml/src/ggml-sycl/ggml-sycl.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index a01e92b1b784e..bddb3c857a1a5 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -372,7 +372,8 @@ static void ggml_backend_sycl_buffer_set_tensor(ggml_backend_buffer_t buffer, auto stream = &(dpct::dev_mgr::instance().get_device(ctx->device).default_queue()); SYCL_CHECK( CHECK_TRY_ERROR(dpct::dev_mgr::instance().get_device(ctx->device).queues_wait_and_throw())); - //note: use host buffer to save the data from mmap(), then copy to device. It's workaround for mmap() issue on PVC GPU. + // Note: Use host buffer to save the data from mmap(), then copy to device. It's workaround for mmap() issue on PVC GPU. + // This function will be called during load model from disk. Use memory buffer replace dynamic won't save more time and brings potential memory leak risk here. char* host_buf = (char*)malloc(size); memcpy(host_buf, data, size); SYCL_CHECK( From e9e1882d2dcda1cd6e18d691124be1dca9499a9e Mon Sep 17 00:00:00 2001 From: Neo Zhang Jianyu Date: Tue, 8 Apr 2025 13:43:11 +0800 Subject: [PATCH 4/4] rm tail space --- ggml/src/ggml-sycl/ggml-sycl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index bddb3c857a1a5..89715eaea0753 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -372,7 +372,7 @@ static void ggml_backend_sycl_buffer_set_tensor(ggml_backend_buffer_t buffer, auto stream = &(dpct::dev_mgr::instance().get_device(ctx->device).default_queue()); SYCL_CHECK( CHECK_TRY_ERROR(dpct::dev_mgr::instance().get_device(ctx->device).queues_wait_and_throw())); - // Note: Use host buffer to save the data from mmap(), then copy to device. It's workaround for mmap() issue on PVC GPU. + // Note: Use host buffer to save the data from mmap(), then copy to device. It's workaround for mmap() issue on PVC GPU. // This function will be called during load model from disk. Use memory buffer replace dynamic won't save more time and brings potential memory leak risk here. char* host_buf = (char*)malloc(size); memcpy(host_buf, data, size);