Skip to content

Commit b5b4e67

Browse files
committed
Test commit tweaking gemm k threading kernel parameters
Less drastic measure than removing `k` threading kernel
1 parent 3762f28 commit b5b4e67

File tree

1 file changed

+16
-16
lines changed
  • dpctl/tensor/libtensor/include/kernels/linalg_functions

1 file changed

+16
-16
lines changed

dpctl/tensor/libtensor/include/kernels/linalg_functions/gemm.hpp

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1068,7 +1068,7 @@ sycl::event gemm_impl(sycl::queue &exec_q,
10681068
constexpr size_t m_groups = 1;
10691069
size_t delta_k(4);
10701070
size_t n_wi(64);
1071-
size_t delta_n(32);
1071+
size_t delta_n(16);
10721072

10731073
gemm_detail::scale_gemm_k_parameters<resTy, m_groups>(
10741074
local_mem_size, reserved_slm_size,
@@ -1105,7 +1105,7 @@ sycl::event gemm_impl(sycl::queue &exec_q,
11051105
constexpr size_t m_groups = 2;
11061106
size_t delta_k(4);
11071107
size_t n_wi(64);
1108-
size_t delta_n(32);
1108+
size_t delta_n(16);
11091109

11101110
gemm_detail::scale_gemm_k_parameters<resTy, m_groups>(
11111111
local_mem_size, reserved_slm_size,
@@ -1236,7 +1236,7 @@ sycl::event gemm_contig_impl(sycl::queue &exec_q,
12361236
constexpr size_t m_groups = 1;
12371237
size_t delta_k(4);
12381238
size_t n_wi(64);
1239-
size_t delta_n(32);
1239+
size_t delta_n(16);
12401240

12411241
gemm_detail::scale_gemm_k_parameters<resTy, m_groups>(
12421242
local_mem_size, reserved_slm_size,
@@ -1273,7 +1273,7 @@ sycl::event gemm_contig_impl(sycl::queue &exec_q,
12731273
constexpr size_t m_groups = 2;
12741274
size_t delta_k(4);
12751275
size_t n_wi(64);
1276-
size_t delta_n(32);
1276+
size_t delta_n(16);
12771277

12781278
gemm_detail::scale_gemm_k_parameters<resTy, m_groups>(
12791279
local_mem_size, reserved_slm_size,
@@ -1968,7 +1968,7 @@ sycl::event gemm_tree_impl(sycl::queue &exec_q,
19681968
// temp memory if only one group is needed
19691969
size_t delta_k(4);
19701970
size_t n_wi(64);
1971-
size_t delta_n(32);
1971+
size_t delta_n(16);
19721972

19731973
using dpctl::tensor::type_utils::is_complex;
19741974
if constexpr (!is_complex<resTy>::value) {
@@ -3402,7 +3402,7 @@ sycl::event gemm_contig_tree_impl(sycl::queue &exec_q,
34023402
// temp memory if only one group is needed
34033403
size_t delta_k(4);
34043404
size_t n_wi(64);
3405-
size_t delta_n(32);
3405+
size_t delta_n(16);
34063406

34073407
using dpctl::tensor::type_utils::is_complex;
34083408
if constexpr (!is_complex<resTy>::value) {
@@ -5472,8 +5472,8 @@ sycl::event gemm_batch_impl(sycl::queue &exec_q,
54725472
if (m == 1) {
54735473
constexpr int m_groups = 1;
54745474
size_t delta_k(4);
5475-
size_t n_wi(64);
5476-
size_t delta_n(32);
5475+
size_t n_wi(32);
5476+
size_t delta_n(16);
54775477

54785478
gemm_detail::scale_gemm_k_parameters<resTy, m_groups>(
54795479
local_mem_size, reserved_slm_size,
@@ -5514,8 +5514,8 @@ sycl::event gemm_batch_impl(sycl::queue &exec_q,
55145514
else if (k > n && k > m) {
55155515
constexpr size_t m_groups = 2;
55165516
size_t delta_k(4);
5517-
size_t n_wi(64);
5518-
size_t delta_n(32);
5517+
size_t n_wi(32);
5518+
size_t delta_n(16);
55195519

55205520
gemm_detail::scale_gemm_k_parameters<resTy, m_groups>(
55215521
local_mem_size, reserved_slm_size,
@@ -5677,8 +5677,8 @@ sycl::event gemm_batch_contig_impl(sycl::queue &exec_q,
56775677
if (m == 1) {
56785678
constexpr int m_groups = 1;
56795679
size_t delta_k(4);
5680-
size_t n_wi(64);
5681-
size_t delta_n(32);
5680+
size_t n_wi(32);
5681+
size_t delta_n(16);
56825682

56835683
gemm_detail::scale_gemm_k_parameters<resTy, m_groups>(
56845684
local_mem_size, reserved_slm_size,
@@ -5719,8 +5719,8 @@ sycl::event gemm_batch_contig_impl(sycl::queue &exec_q,
57195719
else if (k > n && k > m) {
57205720
constexpr size_t m_groups = 2;
57215721
size_t delta_k(4);
5722-
size_t n_wi(64);
5723-
size_t delta_n(32);
5722+
size_t n_wi(32);
5723+
size_t delta_n(16);
57245724

57255725
gemm_detail::scale_gemm_k_parameters<resTy, m_groups>(
57265726
local_mem_size, reserved_slm_size,
@@ -6499,7 +6499,7 @@ gemm_batch_tree_impl(sycl::queue &exec_q,
64996499

65006500
if ((k > n && k > m) || m == 1) {
65016501
size_t delta_k(4);
6502-
size_t n_wi(64);
6502+
size_t n_wi(32);
65036503
size_t delta_n(4);
65046504

65056505
using dpctl::tensor::type_utils::is_complex;
@@ -8205,7 +8205,7 @@ gemm_batch_contig_tree_impl(sycl::queue &exec_q,
82058205

82068206
if ((k > n && k > m) || m == 1) {
82078207
size_t delta_k(4);
8208-
size_t n_wi(64);
8208+
size_t n_wi(32);
82098209
size_t delta_n(4);
82108210

82118211
using dpctl::tensor::type_utils::is_complex;

0 commit comments

Comments
 (0)