13
13
namespace torch_ipex {
14
14
15
15
void AtenIpexTypeExt::packed_add_ (at::Tensor & top_half, at::Tensor & bot_half, const at::Tensor & grad, float alpha) {
16
- TORCH_INTERNAL_ASSERT (grad.scalar_type () == at::ScalarType::BFloat16);
17
- TORCH_INTERNAL_ASSERT (top_half.scalar_type () == at::ScalarType::BFloat16);
18
- TORCH_INTERNAL_ASSERT (bot_half.scalar_type () == at::ScalarType::BFloat16);
19
- TORCH_INTERNAL_ASSERT (grad.device ().type () == at::DeviceType::DPCPP);
20
- TORCH_INTERNAL_ASSERT (top_half.device ().type () == at::DeviceType::DPCPP);
21
- TORCH_INTERNAL_ASSERT (bot_half.device ().type () == at::DeviceType::DPCPP);
22
- TORCH_INTERNAL_ASSERT (top_half.sizes () == bot_half.sizes ());
23
- TORCH_INTERNAL_ASSERT (top_half.is_contiguous ());
24
- TORCH_INTERNAL_ASSERT (bot_half.is_contiguous ());
16
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (grad.scalar_type () == at::ScalarType::BFloat16);
17
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (top_half.scalar_type () == at::ScalarType::BFloat16);
18
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (bot_half.scalar_type () == at::ScalarType::BFloat16);
19
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (grad.device ().type () == at::DeviceType::DPCPP);
20
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (top_half.device ().type () == at::DeviceType::DPCPP);
21
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (bot_half.device ().type () == at::DeviceType::DPCPP);
22
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (top_half.sizes () == bot_half.sizes ());
23
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (top_half.is_contiguous ());
24
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (bot_half.is_contiguous ());
25
25
26
26
RECORD_FUNCTION (" packed_add_" , std::vector<c10::IValue>({top_half, bot_half, grad, alpha}), torch::autograd::Node::peek_at_next_sequence_nr ());
27
27
if (grad.is_sparse ()) {
28
- TORCH_INTERNAL_ASSERT (top_half.dim () == 2 );
28
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (top_half.dim () == 2 );
29
29
auto sparse_nnz = grad._nnz ();
30
30
auto sparse_dim = grad.sparse_dim ();
31
31
auto values = grad._values ();
@@ -34,14 +34,14 @@ void AtenIpexTypeExt::packed_add_(at::Tensor & top_half, at::Tensor & bot_half,
34
34
auto feature_size = values.stride (0 );
35
35
auto indices_accessor = indices.accessor <int64_t , 2 >();
36
36
37
- TORCH_INTERNAL_ASSERT (values.is_contiguous ());
37
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (values.is_contiguous ());
38
38
auto value_ptr = values.data_ptr <at::BFloat16>();
39
39
auto top_half_ptr = top_half.data_ptr <at::BFloat16>();
40
40
auto bot_half_ptr = bot_half.data_ptr <at::BFloat16>();
41
41
42
- TORCH_INTERNAL_ASSERT (value_ptr != nullptr );
43
- TORCH_INTERNAL_ASSERT (top_half_ptr != nullptr );
44
- TORCH_INTERNAL_ASSERT (bot_half_ptr != nullptr );
42
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (value_ptr != nullptr );
43
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (top_half_ptr != nullptr );
44
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (bot_half_ptr != nullptr );
45
45
46
46
std::vector<int64_t > sparse_stride (sparse_dim);
47
47
for (int64_t d = 0 ; d < sparse_dim; d++) {
@@ -80,7 +80,7 @@ void AtenIpexTypeExt::packed_add_(at::Tensor & top_half, at::Tensor & bot_half,
80
80
}
81
81
});
82
82
} else {
83
- TORCH_INTERNAL_ASSERT (grad.is_contiguous ());
83
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (grad.is_contiguous ());
84
84
// TODO: vector implementation basing on vector size
85
85
union packed_bf16 {
86
86
unsigned short s[2 ];
@@ -201,15 +201,15 @@ inline at::Tensor _interaction_forward(const std::vector<at::Tensor> & input) {
201
201
std::vector<uint32_t > feature_sizes (input.size ());
202
202
std::vector<T *> input_data (input.size ());
203
203
for (int i = 0 ; i < input.size (); i++) {
204
- TORCH_INTERNAL_ASSERT (input[i].is_contiguous ());
205
- TORCH_INTERNAL_ASSERT (input[i].device ().is_dpcpp ());
206
- TORCH_INTERNAL_ASSERT (input[i].dim () == 2 );
204
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (input[i].is_contiguous ());
205
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (input[i].device ().is_dpcpp ());
206
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (input[i].dim () == 2 );
207
207
feature_sizes[i] = input[i].sizes ()[1 ];
208
208
total_feature_size += input[i].sizes ()[1 ];
209
209
input_data[i] = input[i].data_ptr <T>();
210
210
}
211
211
auto vector_nums = total_feature_size / vector_size;
212
- TORCH_INTERNAL_ASSERT (total_feature_size % vector_size == 0 );
212
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (total_feature_size % vector_size == 0 );
213
213
auto interact_feature_size = vector_nums * (vector_nums - 1 ) / 2 ;
214
214
auto tr_vector_size = sizeof (T) == 4 ? vector_size : vector_size / 2 ;
215
215
auto out = at::empty ({batch_size, interact_feature_size + vector_size}, input[0 ].options ());
@@ -239,7 +239,7 @@ inline at::Tensor _interaction_forward(const std::vector<at::Tensor> & input) {
239
239
240
240
template <typename T>
241
241
inline std::vector<at::Tensor> _interaction_backward (const at::Tensor & grad_out, const std::vector<at::Tensor> & input) {
242
- TORCH_INTERNAL_ASSERT (grad_out.is_contiguous ());
242
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (grad_out.is_contiguous ());
243
243
RECORD_FUNCTION (" _interaction_backward" , std::vector<c10::IValue>({grad_out, input}), torch::autograd::Node::peek_at_next_sequence_nr ());
244
244
uint32_t total_feature_size = 0 ;
245
245
int64_t batch_size = input[0 ].sizes ()[0 ];
@@ -257,7 +257,7 @@ inline std::vector<at::Tensor> _interaction_backward(const at::Tensor & grad_out
257
257
output_data[i] = output[i].data_ptr <T>();
258
258
}
259
259
auto vector_nums = total_feature_size / vector_size;
260
- TORCH_INTERNAL_ASSERT (total_feature_size % vector_size == 0 );
260
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (total_feature_size % vector_size == 0 );
261
261
auto interact_feature_size = vector_nums * (vector_nums - 1 ) / 2 ;
262
262
auto grad_out_data = grad_out.data_ptr <T>();
263
263
@@ -305,11 +305,11 @@ inline std::vector<at::Tensor> _interaction_backward(const at::Tensor & grad_out
305
305
306
306
at::Tensor AtenIpexTypeExt::interaction_forward (const std::vector<at::Tensor> & input) {
307
307
if (input[0 ].scalar_type () == at::kFloat ) {
308
- for (const auto &in : input) { TORCH_INTERNAL_ASSERT (in.scalar_type () == at::kFloat ); }
308
+ for (const auto &in : input) { TORCH_INTERNAL_ASSERT_DEBUG_ONLY (in.scalar_type () == at::kFloat ); }
309
309
return _interaction_forward<float >(input);
310
310
} else {
311
- TORCH_INTERNAL_ASSERT (input[0 ].scalar_type () == at::kBFloat16 );
312
- for (const auto &in : input) { TORCH_INTERNAL_ASSERT (in.scalar_type () == at::kBFloat16 ); }
311
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (input[0 ].scalar_type () == at::kBFloat16 );
312
+ for (const auto &in : input) { TORCH_INTERNAL_ASSERT_DEBUG_ONLY (in.scalar_type () == at::kBFloat16 ); }
313
313
return _interaction_forward<at::BFloat16>(input);
314
314
}
315
315
}
@@ -318,18 +318,18 @@ std::vector<at::Tensor> AtenIpexTypeExt::interaction_backward(const at::Tensor &
318
318
if (grad_out.scalar_type () == at::kFloat ) {
319
319
return _interaction_backward<float >(grad_out, input);
320
320
} else {
321
- TORCH_INTERNAL_ASSERT (grad_out.scalar_type () == at::kBFloat16 );
321
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (grad_out.scalar_type () == at::kBFloat16 );
322
322
return _interaction_backward<at::BFloat16>(grad_out, input);
323
323
}
324
324
}
325
325
326
326
template <typename T>
327
327
static inline at::Tensor _embedding_bag_forward (const at::Tensor &weights, const at::Tensor &inputs, const at::Tensor &offsets) {
328
- TORCH_INTERNAL_ASSERT (weights.is_contiguous ());
329
- TORCH_INTERNAL_ASSERT (inputs.is_contiguous ());
330
- TORCH_INTERNAL_ASSERT (offsets.is_contiguous ());
331
- TORCH_INTERNAL_ASSERT (inputs.dim () == 1 );
332
- TORCH_INTERNAL_ASSERT (weights.dim () == 2 );
328
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (weights.is_contiguous ());
329
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (inputs.is_contiguous ());
330
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (offsets.is_contiguous ());
331
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (inputs.dim () == 1 );
332
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (weights.dim () == 2 );
333
333
RECORD_FUNCTION (" _embedding_bag_forward" , std::vector<c10::IValue>({weights, inputs, offsets}), torch::autograd::Node::peek_at_next_sequence_nr ());
334
334
auto batch_size = offsets.size (0 );
335
335
auto num_input = inputs.size (0 );
@@ -345,7 +345,7 @@ static inline at::Tensor _embedding_bag_forward(const at::Tensor &weights, const
345
345
auto inputs_start = offsets_data[i];
346
346
auto inputs_end = (i < batch_size - 1 ) ? offsets_data[i + 1 ] : num_input;
347
347
// TODO: add acc_t support for bag size larger than 1
348
- TORCH_INTERNAL_ASSERT (inputs_end - inputs_start == 1 );
348
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (inputs_end - inputs_start == 1 );
349
349
auto out_data_ptr = &output_data[i * vector_size];
350
350
#pragma omp simd
351
351
for (int64_t v = 0 ; v < vector_size; v++) out_data_ptr[v] = 0.0 ;
@@ -361,8 +361,8 @@ static inline at::Tensor _embedding_bag_forward(const at::Tensor &weights, const
361
361
template <typename T>
362
362
static inline at::Tensor _embedding_bag_backward (const at::Tensor &grad_out,
363
363
const at::Tensor &weights, const at::Tensor &inputs, const at::Tensor offsets) {
364
- TORCH_INTERNAL_ASSERT (inputs.dim () == 1 );
365
- TORCH_INTERNAL_ASSERT (grad_out.dim () == 2 );
364
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (inputs.dim () == 1 );
365
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (grad_out.dim () == 2 );
366
366
RECORD_FUNCTION (" _embedding_bag_backward" , std::vector<c10::IValue>({grad_out, weights, inputs, offsets}), torch::autograd::Node::peek_at_next_sequence_nr ());
367
367
auto batch_size = offsets.size (0 );
368
368
auto num_input = inputs.size (0 );
@@ -408,7 +408,7 @@ at::Tensor AtenIpexTypeExt::embedding_bag_forward(const at::Tensor &weights, con
408
408
if (weights.scalar_type () == at::kFloat ) {
409
409
return _embedding_bag_forward<float >(weights, inputs, offsets);
410
410
} else {
411
- TORCH_INTERNAL_ASSERT (weights.scalar_type () == at::kBFloat16 );
411
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (weights.scalar_type () == at::kBFloat16 );
412
412
return _embedding_bag_forward<at::BFloat16>(weights, inputs, offsets);
413
413
}
414
414
}
@@ -418,7 +418,7 @@ at::Tensor AtenIpexTypeExt::embedding_bag_backward(const at::Tensor &grad_out,
418
418
if (grad_out.scalar_type () == at::kFloat ) {
419
419
return _embedding_bag_backward<float >(grad_out, weights, inputs, offsets);
420
420
} else {
421
- TORCH_INTERNAL_ASSERT (grad_out.scalar_type () == at::kBFloat16 );
421
+ TORCH_INTERNAL_ASSERT_DEBUG_ONLY (grad_out.scalar_type () == at::kBFloat16 );
422
422
return _embedding_bag_backward<at::BFloat16>(grad_out, weights, inputs, offsets);
423
423
}
424
424
}
0 commit comments