@@ -200,18 +200,18 @@ py_accumulate_over_axis(const dpctl::tensor::usm_ndarray &src,
200
200
}
201
201
202
202
using dpctl::tensor::offset_utils::device_allocate_and_pack;
203
- const auto & ptr_size_event_tuple = device_allocate_and_pack<py::ssize_t >(
203
+ auto ptr_size_event_tuple = device_allocate_and_pack<py::ssize_t >(
204
204
exec_q, host_task_events, simplified_iter_shape,
205
205
simplified_iter_src_strides, simplified_iter_dst_strides, acc_shape,
206
206
acc_src_strides, acc_dst_strides);
207
- py::ssize_t *packed_shapes_and_strides = std::get<0 >(ptr_size_event_tuple);
208
- if (packed_shapes_and_strides == nullptr ) {
209
- throw std::runtime_error (" Unexpected error" );
210
- }
207
+ auto packed_shapes_and_strides_owner =
208
+ std::move (std::get<0 >(ptr_size_event_tuple));
211
209
const auto ©_shapes_strides_ev = std::get<2 >(ptr_size_event_tuple);
210
+ const py::ssize_t *packed_shapes_and_strides =
211
+ packed_shapes_and_strides_owner.get ();
212
212
213
- py::ssize_t *iter_shape_and_strides = packed_shapes_and_strides;
214
- py::ssize_t *acc_shapes_and_strides =
213
+ const py::ssize_t *iter_shape_and_strides = packed_shapes_and_strides;
214
+ const py::ssize_t *acc_shapes_and_strides =
215
215
packed_shapes_and_strides + 3 * simplified_iter_shape.size ();
216
216
217
217
std::vector<sycl::event> all_deps;
@@ -224,14 +224,8 @@ py_accumulate_over_axis(const dpctl::tensor::usm_ndarray &src,
224
224
iter_shape_and_strides, iter_src_offset, iter_dst_offset, acc_nd,
225
225
acc_shapes_and_strides, dst_data, host_task_events, all_deps);
226
226
227
- sycl::event temp_cleanup_ev = exec_q.submit ([&](sycl::handler &cgh) {
228
- cgh.depends_on (acc_ev);
229
- const auto &ctx = exec_q.get_context ();
230
- using dpctl::tensor::alloc_utils::sycl_free_noexcept;
231
- cgh.host_task ([ctx, packed_shapes_and_strides] {
232
- sycl_free_noexcept (packed_shapes_and_strides, ctx);
233
- });
234
- });
227
+ sycl::event temp_cleanup_ev = dpctl::tensor::alloc_utils::async_smart_free (
228
+ exec_q, {acc_ev}, packed_shapes_and_strides_owner);
235
229
host_task_events.push_back (temp_cleanup_ev);
236
230
237
231
return std::make_pair (
@@ -384,18 +378,18 @@ std::pair<sycl::event, sycl::event> py_accumulate_final_axis_include_initial(
384
378
}
385
379
386
380
using dpctl::tensor::offset_utils::device_allocate_and_pack;
387
- const auto & ptr_size_event_tuple = device_allocate_and_pack<py::ssize_t >(
381
+ auto ptr_size_event_tuple = device_allocate_and_pack<py::ssize_t >(
388
382
exec_q, host_task_events, simplified_iter_shape,
389
383
simplified_iter_src_strides, simplified_iter_dst_strides, acc_shape,
390
384
acc_src_strides, acc_dst_strides);
391
- py::ssize_t *packed_shapes_and_strides = std::get<0 >(ptr_size_event_tuple);
392
- if (packed_shapes_and_strides == nullptr ) {
393
- throw std::runtime_error (" Unexpected error" );
394
- }
385
+ auto packed_shapes_and_strides_owner =
386
+ std::move (std::get<0 >(ptr_size_event_tuple));
395
387
const auto ©_shapes_strides_ev = std::get<2 >(ptr_size_event_tuple);
388
+ const py::ssize_t *packed_shapes_and_strides =
389
+ packed_shapes_and_strides_owner.get ();
396
390
397
- py::ssize_t *iter_shape_and_strides = packed_shapes_and_strides;
398
- py::ssize_t *acc_shapes_and_strides =
391
+ const py::ssize_t *iter_shape_and_strides = packed_shapes_and_strides;
392
+ const py::ssize_t *acc_shapes_and_strides =
399
393
packed_shapes_and_strides + 3 * simplified_iter_shape.size ();
400
394
401
395
std::vector<sycl::event> all_deps;
@@ -408,14 +402,8 @@ std::pair<sycl::event, sycl::event> py_accumulate_final_axis_include_initial(
408
402
iter_shape_and_strides, iter_src_offset, iter_dst_offset, acc_nd,
409
403
acc_shapes_and_strides, dst_data, host_task_events, all_deps);
410
404
411
- sycl::event temp_cleanup_ev = exec_q.submit ([&](sycl::handler &cgh) {
412
- cgh.depends_on (acc_ev);
413
- const auto &ctx = exec_q.get_context ();
414
- using dpctl::tensor::alloc_utils::sycl_free_noexcept;
415
- cgh.host_task ([ctx, packed_shapes_and_strides] {
416
- sycl_free_noexcept (packed_shapes_and_strides, ctx);
417
- });
418
- });
405
+ sycl::event temp_cleanup_ev = dpctl::tensor::alloc_utils::async_smart_free (
406
+ exec_q, {acc_ev}, packed_shapes_and_strides_owner);
419
407
host_task_events.push_back (temp_cleanup_ev);
420
408
421
409
return std::make_pair (
0 commit comments