Skip to content

Commit bd52b27

Browse files
Use n_vecs = 2 for minimum/maximum to work around a bug in CPU driver for AMD hardware
1 parent 63c82fc commit bd52b27

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

dpctl/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ sycl::event maximum_contig_impl(sycl::queue &exec_q,
206206
{
207207
using resTy = typename MaximumOutputType<argTy1, argTy2>::value_type;
208208
constexpr auto vec_sz = VecSize_v<argTy1, argTy2, resTy>;
209-
constexpr unsigned int n_vecs = 1u;
209+
constexpr unsigned int n_vecs = 2u;
210210

211211
return elementwise_common::binary_contig_impl<
212212
argTy1, argTy2, MaximumOutputType, MaximumContigFunctor,

dpctl/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ sycl::event minimum_contig_impl(sycl::queue &exec_q,
206206
{
207207
using resTy = typename MinimumOutputType<argTy1, argTy2>::value_type;
208208
constexpr auto vec_sz = VecSize_v<argTy1, argTy2, resTy>;
209-
constexpr unsigned int n_vecs = 1u;
209+
constexpr unsigned int n_vecs = 2u;
210210

211211
return elementwise_common::binary_contig_impl<
212212
argTy1, argTy2, MinimumOutputType, MinimumContigFunctor,

0 commit comments

Comments
 (0)