Skip to content

Commit ff0d4ea

Browse files
Merge pull request #1782 from IntelPython/add-usm-memory-constructor-from-existing-allocation
Add usm memory constructor from existing allocation
2 parents 1a789e7 + 65230a4 commit ff0d4ea

File tree

6 files changed

+129
-5
lines changed

6 files changed

+129
-5
lines changed

dpctl/apis/include/dpctl4pybind11.hpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727

2828
#include "dpctl_capi.h"
2929
#include <complex>
30+
#include <exception>
3031
#include <memory>
3132
#include <pybind11/pybind11.h>
3233
#include <sycl/sycl.hpp>
@@ -748,6 +749,53 @@ class usm_memory : public py::object
748749
throw py::error_already_set();
749750
}
750751

752+
/*! @brief Create usm_memory object from shared pointer that manages
753+
* lifetime of the USM allocation.
754+
*/
755+
usm_memory(void *usm_ptr,
756+
size_t nbytes,
757+
const sycl::queue &q,
758+
std::shared_ptr<void> shptr)
759+
{
760+
auto const &api = ::dpctl::detail::dpctl_capi::get();
761+
DPCTLSyclUSMRef usm_ref = reinterpret_cast<DPCTLSyclUSMRef>(usm_ptr);
762+
auto q_uptr = std::make_unique<sycl::queue>(q);
763+
DPCTLSyclQueueRef QRef =
764+
reinterpret_cast<DPCTLSyclQueueRef>(q_uptr.get());
765+
766+
auto vacuous_destructor = []() {};
767+
py::capsule mock_owner(vacuous_destructor);
768+
769+
// create memory object owned by mock_owner, it is a new reference
770+
PyObject *_memory =
771+
api.Memory_Make_(usm_ref, nbytes, QRef, mock_owner.ptr());
772+
auto ref_count_decrementer = [](PyObject *o) noexcept { Py_DECREF(o); };
773+
774+
using py_uptrT =
775+
std::unique_ptr<PyObject, decltype(ref_count_decrementer)>;
776+
777+
if (!_memory) {
778+
throw py::error_already_set();
779+
}
780+
781+
auto memory_uptr = py_uptrT(_memory, ref_count_decrementer);
782+
std::shared_ptr<void> *opaque_ptr = new std::shared_ptr<void>(shptr);
783+
784+
Py_MemoryObject *memobj = reinterpret_cast<Py_MemoryObject *>(_memory);
785+
// replace mock_owner capsule as the owner
786+
memobj->refobj = Py_None;
787+
// set opaque ptr field, usm_memory now knowns that USM is managed
788+
// by smart pointer
789+
memobj->_opaque_ptr = reinterpret_cast<void *>(opaque_ptr);
790+
791+
// _memory will delete created copies of sycl::queue, and
792+
// std::shared_ptr and the deleter of the shared_ptr<void> is
793+
// supposed to free the USM allocation
794+
m_ptr = _memory;
795+
q_uptr.release();
796+
memory_uptr.release();
797+
}
798+
751799
sycl::queue get_queue() const
752800
{
753801
Py_MemoryObject *mem_obj = reinterpret_cast<Py_MemoryObject *>(m_ptr);

examples/pybind11/external_usm_allocation/README.md

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
# Exposing USM Allocations Made by the Native Code to dpctl
22

3-
This extension demonstrates how a Python object backed by
3+
This extension demonstrates how a Python object representing
44
a native class, which allocates USM memory, can expose it
5-
to the `dpctl.memory` entities using `__sycl_usm_array_interface__`.
5+
to the `dpctl.memory` entities using `__sycl_usm_array_interface__`,
6+
and how to create `dpctl.memory` object from allocation made
7+
in native extension.
68

79

810
## Building
@@ -29,4 +31,10 @@ shared
2931
[1.0, 1.0, 0.0, 2.0, 2.0]
3032
[0.0, 0.0, 0.0, 3.0, -1.0]
3133
[0.0, 0.0, 0.0, -1.0, 5.0]
34+
35+
========================================
36+
device
37+
64
38+
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
39+
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
3240
```

examples/pybind11/external_usm_allocation/example.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,10 @@
5050
list_of_lists = matr.tolist()
5151
for row in list_of_lists:
5252
print(row)
53+
54+
print("====" * 10)
55+
56+
mbuf = eua.make_zeroed_device_memory(4 * 16, q)
57+
print(mbuf.get_usm_type())
58+
print(mbuf.nbytes)
59+
print(mbuf.copy_to_host())

examples/pybind11/external_usm_allocation/external_usm_allocation/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@
1616

1717
# coding: utf-8
1818

19-
from ._external_usm_alloc import DMatrix
19+
from ._external_usm_alloc import DMatrix, make_zeroed_device_memory
2020

21-
__all__ = ["DMatrix"]
21+
__all__ = ["DMatrix", "make_zeroed_device_memory"]
2222

2323
__doc__ = """
2424
Example of implementing C++ class with its own USM memory allocation logic

examples/pybind11/external_usm_allocation/external_usm_allocation/_usm_alloc_example.cpp

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,12 @@
3131
/// content of the object into list of lists of Python floats.
3232
///
3333
//===----------------------------------------------------------------------===//
34-
#include <CL/sycl.hpp>
34+
#include <sycl/sycl.hpp>
3535

3636
#include "dpctl4pybind11.hpp"
3737
#include "pybind11/pybind11.h"
3838
#include "pybind11/stl.h"
39+
#include <memory>
3940

4041
namespace py = pybind11;
4142

@@ -120,6 +121,26 @@ py::list tolist(DMatrix &m)
120121
return rows;
121122
}
122123

124+
dpctl::memory::usm_memory make_zeroed_device_memory(size_t nbytes,
125+
sycl::queue &q)
126+
{
127+
char *data = sycl::malloc_device<char>(nbytes, q);
128+
q.memset(data, 0, nbytes).wait();
129+
130+
const sycl::context &ctx = q.get_context();
131+
auto _deleter = [ctx](void *ptr) {
132+
try {
133+
::sycl::free(ptr, ctx);
134+
} catch (const std::exception &e) {
135+
std::cout << "Call to sycl::free caught an exception: " << e.what()
136+
<< std::endl;
137+
}
138+
};
139+
auto shptr = std::shared_ptr<void>(data, _deleter);
140+
141+
return dpctl::memory::usm_memory(data, nbytes, q, shptr);
142+
}
143+
123144
PYBIND11_MODULE(_external_usm_alloc, m)
124145
{
125146
py::class_<DMatrix> dm(m, "DMatrix");
@@ -128,4 +149,7 @@ PYBIND11_MODULE(_external_usm_alloc, m)
128149
dm.def_property("__sycl_usm_array_interface__", &construct_sua_iface,
129150
nullptr);
130151
dm.def("tolist", &tolist, "Return matrix a Python list of lists");
152+
153+
m.def("make_zeroed_device_memory", &make_zeroed_device_memory,
154+
"Returns zero-initialized USM-device allocation created C++");
131155
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Data Parallel Control (dpctl)
2+
#
3+
# Copyright 2020-2024 Intel Corporation
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
# coding: utf-8
18+
19+
import external_usm_allocation as eua
20+
21+
import dpctl
22+
import dpctl.memory as dpm
23+
import dpctl.tensor as dpt
24+
25+
26+
def test_direct():
27+
q = dpctl.SyclQueue()
28+
29+
nb = 2 * 30
30+
mbuf = eua.make_zeroed_device_memory(nb, q)
31+
32+
assert isinstance(mbuf, dpm.MemoryUSMDevice)
33+
assert mbuf.nbytes == 2 * 30
34+
assert mbuf.sycl_queue == q
35+
36+
x = dpt.usm_ndarray(30, dtype="i2", buffer=mbuf)
37+
assert dpt.all(x == dpt.zeros(30, dtype="i2", sycl_queue=q))

0 commit comments

Comments
 (0)