@@ -817,29 +817,26 @@ sycl::event stable_argsort_axis1_contig_impl(
817
817
size_t sorted_block_size =
818
818
(sort_nelems >= 512 ) ? 512 : determine_automatically;
819
819
820
- sycl::buffer<IndexTy, 1 > index_data (
821
- sycl::range<1 >(iter_nelems * sort_nelems));
820
+ const size_t total_nelems = iter_nelems * sort_nelems;
822
821
823
822
sycl::event populate_indexed_data_ev =
824
823
exec_q.submit ([&](sycl::handler &cgh) {
825
824
cgh.depends_on (depends);
826
- sycl::accessor acc (index_data, cgh, sycl::write_only,
827
- sycl::no_init);
828
825
829
- auto const & range = index_data. get_range () ;
826
+ const sycl:: range< 1 > range{total_nelems} ;
830
827
831
828
using KernelName =
832
829
populate_index_data_krn<argTy, IndexTy, ValueComp>;
833
830
834
831
cgh.parallel_for <KernelName>(range, [=](sycl::id<1 > id) {
835
832
size_t i = id[0 ];
836
- acc [i] = static_cast <IndexTy>(i);
833
+ res_tp [i] = static_cast <IndexTy>(i);
837
834
});
838
835
});
839
836
840
837
// Sort segments of the array
841
838
sycl::event base_sort_ev = sort_detail::sort_over_work_group_contig_impl (
842
- exec_q, iter_nelems, sort_nelems, index_data , res_tp, index_comp,
839
+ exec_q, iter_nelems, sort_nelems, res_tp , res_tp, index_comp,
843
840
sorted_block_size, // modified in place with size of sorted block size
844
841
{populate_indexed_data_ev});
845
842
@@ -856,9 +853,11 @@ sycl::event stable_argsort_axis1_contig_impl(
856
853
857
854
using KernelName = index_map_to_rows_krn<argTy, IndexTy, ValueComp>;
858
855
859
- cgh.parallel_for <KernelName>(
860
- index_data.get_range (),
861
- [=](sycl::id<1 > id) { res_tp[id] = (temp_acc[id] % sort_nelems); });
856
+ const sycl::range<1 > range{total_nelems};
857
+
858
+ cgh.parallel_for <KernelName>(range, [=](sycl::id<1 > id) {
859
+ res_tp[id] = (temp_acc[id] % sort_nelems);
860
+ });
862
861
});
863
862
864
863
return write_out_ev;
0 commit comments