@@ -332,242 +332,6 @@ void copy_numpy_ndarray_into_usm_ndarray(
332
332
return ;
333
333
}
334
334
335
- void copy_numpy_ndarray_into_usm_ndarray_legacy (
336
- const py::array &npy_src,
337
- const dpctl::tensor::usm_ndarray &dst,
338
- sycl::queue &exec_q,
339
- const std::vector<sycl::event> &depends)
340
- {
341
- int src_ndim = npy_src.ndim ();
342
- int dst_ndim = dst.get_ndim ();
343
-
344
- if (src_ndim != dst_ndim) {
345
- throw py::value_error (" Source ndarray and destination usm_ndarray have "
346
- " different array ranks, "
347
- " i.e. different number of indices needed to "
348
- " address array elements." );
349
- }
350
-
351
- const py::ssize_t *src_shape = npy_src.shape ();
352
- const py::ssize_t *dst_shape = dst.get_shape_raw ();
353
- bool shapes_equal (true );
354
- size_t src_nelems (1 );
355
- for (int i = 0 ; i < src_ndim; ++i) {
356
- shapes_equal = shapes_equal && (src_shape[i] == dst_shape[i]);
357
- src_nelems *= static_cast <size_t >(src_shape[i]);
358
- }
359
-
360
- if (!shapes_equal) {
361
- throw py::value_error (" Source ndarray and destination usm_ndarray have "
362
- " difference shapes." );
363
- }
364
-
365
- if (src_nelems == 0 ) {
366
- // nothing to do
367
- return ;
368
- }
369
-
370
- dpctl::tensor::validation::AmpleMemory::throw_if_not_ample (dst, src_nelems);
371
-
372
- if (!dpctl::utils::queues_are_compatible (exec_q, {dst})) {
373
- throw py::value_error (" Execution queue is not compatible with the "
374
- " allocation queue" );
375
- }
376
-
377
- dpctl::tensor::validation::CheckWritable::throw_if_not_writable (dst);
378
-
379
- // here we assume that NumPy's type numbers agree with ours for types
380
- // supported in both
381
- int src_typenum =
382
- py::detail::array_descriptor_proxy (npy_src.dtype ().ptr ())->type_num ;
383
- int dst_typenum = dst.get_typenum ();
384
-
385
- auto array_types = td_ns::usm_ndarray_types ();
386
- int src_type_id = array_types.typenum_to_lookup_id (src_typenum);
387
- int dst_type_id = array_types.typenum_to_lookup_id (dst_typenum);
388
-
389
- py::buffer_info src_pybuf = npy_src.request ();
390
- const char *const src_data = static_cast <const char *const >(src_pybuf.ptr );
391
- char *dst_data = dst.get_data ();
392
-
393
- int src_flags = npy_src.flags ();
394
-
395
- // check for applicability of special cases:
396
- // (same type && (both C-contiguous || both F-contiguous)
397
- const bool both_c_contig =
398
- ((src_flags & py::array::c_style) && dst.is_c_contiguous ());
399
- const bool both_f_contig =
400
- ((src_flags & py::array::f_style) && dst.is_f_contiguous ());
401
-
402
- const bool same_data_types = (src_type_id == dst_type_id);
403
-
404
- if (both_c_contig || both_f_contig) {
405
- if (same_data_types) {
406
- int src_elem_size = npy_src.itemsize ();
407
-
408
- sycl::event copy_ev =
409
- exec_q.memcpy (static_cast <void *>(dst_data),
410
- static_cast <const void *>(src_data),
411
- src_nelems * src_elem_size, depends);
412
-
413
- {
414
- // wait for copy_ev to complete
415
- // release GIL to allow other threads (host_tasks)
416
- // a chance to acquire GIL
417
- py::gil_scoped_release lock{};
418
- copy_ev.wait ();
419
- }
420
-
421
- return ;
422
- }
423
- }
424
-
425
- auto const &dst_strides =
426
- dst.get_strides_vector (); // N.B.: strides in elements
427
-
428
- using shT = std::vector<py::ssize_t >;
429
- shT simplified_shape;
430
- shT simplified_src_strides;
431
- shT simplified_dst_strides;
432
- py::ssize_t src_offset (0 );
433
- py::ssize_t dst_offset (0 );
434
-
435
- int nd = src_ndim;
436
- const py::ssize_t *shape = src_shape;
437
-
438
- const py::ssize_t *src_strides_p =
439
- npy_src.strides (); // N.B.: strides in bytes
440
- py::ssize_t src_itemsize = npy_src.itemsize (); // item size in bytes
441
-
442
- bool is_src_c_contig = ((src_flags & py::array::c_style) != 0 );
443
- bool is_src_f_contig = ((src_flags & py::array::f_style) != 0 );
444
-
445
- shT src_strides_in_elems;
446
- if (src_strides_p) {
447
- src_strides_in_elems.resize (nd);
448
- // copy and convert strides from bytes to elements
449
- std::transform (
450
- src_strides_p, src_strides_p + nd, std::begin (src_strides_in_elems),
451
- [src_itemsize](py::ssize_t el) {
452
- py::ssize_t q = el / src_itemsize;
453
- if (q * src_itemsize != el) {
454
- throw std::runtime_error (
455
- " NumPy array strides are not multiple of itemsize" );
456
- }
457
- return q;
458
- });
459
- }
460
- else {
461
- if (is_src_c_contig) {
462
- src_strides_in_elems =
463
- dpctl::tensor::c_contiguous_strides (nd, src_shape);
464
- }
465
- else if (is_src_f_contig) {
466
- src_strides_in_elems =
467
- dpctl::tensor::f_contiguous_strides (nd, src_shape);
468
- }
469
- else {
470
- throw py::value_error (" NumPy source array has null strides but is "
471
- " neither C- nor F-contiguous." );
472
- }
473
- }
474
-
475
- // nd, simplified_* vectors and offsets are modified by reference
476
- simplify_iteration_space (nd, shape, src_strides_in_elems, dst_strides,
477
- // outputs
478
- simplified_shape, simplified_src_strides,
479
- simplified_dst_strides, src_offset, dst_offset);
480
-
481
- assert (simplified_shape.size () == static_cast <size_t >(nd));
482
- assert (simplified_src_strides.size () == static_cast <size_t >(nd));
483
- assert (simplified_dst_strides.size () == static_cast <size_t >(nd));
484
-
485
- // handle nd == 0
486
- if (nd == 0 ) {
487
- nd = 1 ;
488
- simplified_shape.reserve (nd);
489
- simplified_shape.push_back (1 );
490
-
491
- simplified_src_strides.reserve (nd);
492
- simplified_src_strides.push_back (1 );
493
-
494
- simplified_dst_strides.reserve (nd);
495
- simplified_dst_strides.push_back (1 );
496
- }
497
-
498
- const bool can_use_memcpy =
499
- (same_data_types && (nd == 1 ) && (src_offset == 0 ) &&
500
- (dst_offset == 0 ) && (simplified_src_strides[0 ] == 1 ) &&
501
- (simplified_dst_strides[0 ] == 1 ));
502
-
503
- if (can_use_memcpy) {
504
- int src_elem_size = npy_src.itemsize ();
505
-
506
- sycl::event copy_ev = exec_q.memcpy (
507
- static_cast <void *>(dst_data), static_cast <const void *>(src_data),
508
- src_nelems * src_elem_size, depends);
509
-
510
- {
511
- // wait for copy_ev to complete
512
- // release GIL to allow other threads (host_tasks)
513
- // a chance to acquire GIL
514
- py::gil_scoped_release lock{};
515
-
516
- copy_ev.wait ();
517
- }
518
-
519
- return ;
520
- }
521
-
522
- // Minimum and maximum element offsets for source np.ndarray
523
- py::ssize_t npy_src_min_nelem_offset (src_offset);
524
- py::ssize_t npy_src_max_nelem_offset (src_offset);
525
- for (int i = 0 ; i < nd; ++i) {
526
- if (simplified_src_strides[i] < 0 ) {
527
- npy_src_min_nelem_offset +=
528
- simplified_src_strides[i] * (simplified_shape[i] - 1 );
529
- }
530
- else {
531
- npy_src_max_nelem_offset +=
532
- simplified_src_strides[i] * (simplified_shape[i] - 1 );
533
- }
534
- }
535
-
536
- std::vector<sycl::event> host_task_events;
537
- host_task_events.reserve (1 );
538
-
539
- // Copy shape strides into device memory
540
- using dpctl::tensor::offset_utils::device_allocate_and_pack;
541
- const auto &ptr_size_event_tuple = device_allocate_and_pack<py::ssize_t >(
542
- exec_q, host_task_events, simplified_shape, simplified_src_strides,
543
- simplified_dst_strides);
544
- py::ssize_t *shape_strides = std::get<0 >(ptr_size_event_tuple);
545
- if (shape_strides == nullptr ) {
546
- throw std::runtime_error (" Unable to allocate device memory" );
547
- }
548
- const sycl::event ©_shape_ev = std::get<2 >(ptr_size_event_tuple);
549
-
550
- {
551
- // release GIL for the blocking call
552
- py::gil_scoped_release lock{};
553
-
554
- // Get implementation function pointer
555
- auto copy_and_cast_from_host_blocking_fn =
556
- copy_and_cast_from_host_blocking_dispatch_table[dst_type_id]
557
- [src_type_id];
558
-
559
- copy_and_cast_from_host_blocking_fn (
560
- exec_q, src_nelems, nd, shape_strides, src_data, src_offset,
561
- npy_src_min_nelem_offset, npy_src_max_nelem_offset, dst_data,
562
- dst_offset, depends, {copy_shape_ev});
563
-
564
- using dpctl::tensor::alloc_utils::sycl_free_noexcept;
565
- sycl_free_noexcept (shape_strides, exec_q);
566
- }
567
-
568
- return ;
569
- }
570
-
571
335
void init_copy_numpy_ndarray_into_usm_ndarray_dispatch_tables (void )
572
336
{
573
337
using namespace td_ns ;
0 commit comments