Skip to content

Commit a986bd3

Browse files
authored
(1) remove any tag in inner_product (2) add a UT for linear reorder (#158)
* (1) remove any tag in inner_product (2) add a UT for linear reorder * add inputs for UT test_linear_reorder * add w/o bias in linear_reorder UT * fix for format_list string * format some changes * commit format minor changes
1 parent 7b34979 commit a986bd3

File tree

5 files changed

+170
-60
lines changed

5 files changed

+170
-60
lines changed

ideep/ideep/operators/inner_product.hpp

Lines changed: 30 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,6 @@ struct inner_product_forward : public dnnl::inner_product_forward {
164164
}
165165
} else {
166166
op_attr = attr;
167-
src_desc = {src.get_dims(), data_type::f32, format_tag::any};
168167
if (src.has_scale()) {
169168
auto src_scale = src.get_scale();
170169
src_scale[0] = 1.f / src_scale[0];
@@ -178,56 +177,50 @@ struct inner_product_forward : public dnnl::inner_product_forward {
178177
// align weights data type with src
179178
dst_data_type = src.get_data_type() == data_type::bf16 ? data_type::bf16
180179
: data_type::f32;
181-
src_desc = src.get_desc().to_type(dst_data_type).to_format_any();
182-
weights_desc = weights.get_desc().to_type(dst_data_type).to_format_any();
180+
src_desc = src.get_desc().to_type(dst_data_type);
181+
weights_desc = weights.get_desc().to_type(dst_data_type);
183182
if (with_bias) {
184183
IDEEP_ENFORCE(utils::one_of(bias.get_data_type(),
185184
data_type::f32, data_type::bf16),
186185
"Incorrect data type in bias");
187-
bias_desc = bias.get_desc().to_format_any();
186+
bias_desc = bias.get_desc();
188187
}
189188
}
190189

191-
tensor::desc dst_desc(dst_dims, dst_data_type, format_tag::any);
190+
tensor::desc dst_desc = dst.get_desc().to_type(dst_data_type);
192191
auto pd = with_bias
193192
? primitive_desc({aprop_kind, src_desc, weights_desc, bias_desc,
194193
dst_desc}, op_attr, aengine)
195194
: primitive_desc({aprop_kind, src_desc, weights_desc, dst_desc},
196195
op_attr, aengine);
197196

198-
auto expected_src = src.reorder_if_differ_in(pd.src_desc(), src_attr);
199-
auto expected_weights = weights.reorder_if_differ_in(pd.weights_desc(), weights_attr);
200197
// [ Note output buffer ]
201198
// In this case, dst is an empty ideep tensor, can be re-init
202199
// If dst is not empty, ideep must write result to dst's memory and it is caller's duty to
203200
// make sure dst is big enough to hold the result
204201
if (dst.is_empty()) {
205202
dst.init(pd.dst_desc());
206203
}
207-
auto expected_dst = dst.reorder_if_differ_in(pd.dst_desc());
208-
if (!dst_scales.empty() && utils::one_of(dst.get_data_type(), data_type::u8, data_type::s8)) {
209-
expected_dst.set_scale(dst_scales_in);
204+
205+
if (!dst_scales.empty() &&
206+
utils::one_of(dst.get_data_type(), data_type::u8, data_type::s8)) {
207+
dst.set_scale(dst_scales_in);
210208
}
211209

212210
if (with_bias){
213-
auto expected_bias = bias.reorder_if_differ_in(pd.bias_desc(), bias_attr);
214-
super(pd).execute(stream::default_stream(),
215-
{{DNNL_ARG_SRC, expected_src},
216-
{DNNL_ARG_WEIGHTS, expected_weights},
217-
{DNNL_ARG_BIAS, expected_bias},
218-
{DNNL_ARG_DST, expected_dst}});
211+
super(pd).execute(stream::default_stream(), {{DNNL_ARG_SRC, src},
212+
{DNNL_ARG_WEIGHTS, weights},
213+
{DNNL_ARG_BIAS, bias},
214+
{DNNL_ARG_DST, dst}});
219215
} else {
220-
super(pd).execute(stream::default_stream(),
221-
{{DNNL_ARG_SRC, expected_src},
222-
{DNNL_ARG_WEIGHTS, expected_weights},
223-
{DNNL_ARG_DST, expected_dst}});
216+
super(pd).execute(stream::default_stream(), {{DNNL_ARG_SRC, src},
217+
{DNNL_ARG_WEIGHTS, weights},
218+
{DNNL_ARG_DST, dst}});
224219
}
225220

226-
if (attr.non_negitive_output() && expected_dst.get_data_type() == data_type::s8) {
227-
expected_dst.to_type(data_type::u8);
221+
if (attr.non_negitive_output() && dst.get_data_type() == data_type::s8) {
222+
dst.to_type(data_type::u8);
228223
}
229-
// reorder back to dst's buffer if needed
230-
expected_dst.reorder_to_if_differ_from(dst);
231224
}
232225
};
233226

@@ -242,11 +235,6 @@ struct inner_product_backward_data : public dnnl::inner_product_backward_data {
242235
tensor& diff_src,
243236
const engine& aengine = engine::cpu_engine()) {
244237
auto weights_ = weights;
245-
if (diff_dst.get_data_type() == data_type::bf16) {
246-
weights_.init(weights.get_desc().to_type(data_type::bf16));
247-
weights_.reorder_from(weights);
248-
}
249-
250238
// workaround: diff_src and weights from caffe2 may have different dims.
251239
// It would be better for caffe2 to do this reshape anyway.
252240
if (diff_src_dims.size() != weights.ndims()) {
@@ -255,10 +243,9 @@ struct inner_product_backward_data : public dnnl::inner_product_backward_data {
255243
weights_.reshape(new_dims);
256244
}
257245

258-
auto diff_dst_desc = diff_dst.get_desc().to_format_any();
259-
auto weights_desc = weights_.get_desc().to_format_any();
260-
auto diff_src_desc =
261-
tensor::desc(diff_src_dims, diff_dst.get_data_type(), tag::any);
246+
auto diff_dst_desc = diff_dst.get_desc();
247+
auto weights_desc = weights_.get_desc();
248+
auto diff_src_desc = diff_src.get_desc().to_type(diff_dst.get_data_type());
262249

263250
auto forward_hints =
264251
inner_product_forward::primitive_desc(
@@ -268,8 +255,6 @@ struct inner_product_backward_data : public dnnl::inner_product_backward_data {
268255
auto pd = primitive_desc(
269256
{diff_src_desc, weights_desc, diff_dst_desc}, aengine, forward_hints);
270257

271-
auto expected_diff_dst = diff_dst.reorder_if_differ_in(pd.diff_dst_desc());
272-
auto expected_weights = weights_.reorder_if_differ_in(pd.weights_desc());
273258
// diff_src's origin content are not used, so it can be re-init directly
274259
// It's caller's duty to make sure diff_src's buffer size is same with it actually needed
275260
// Here we dose not support to write to given strided buffer since we know the grad is always contiguous
@@ -280,8 +265,8 @@ struct inner_product_backward_data : public dnnl::inner_product_backward_data {
280265
}
281266

282267
super(pd).execute(stream::default_stream(),
283-
{{DNNL_ARG_DIFF_DST, expected_diff_dst},
284-
{DNNL_ARG_WEIGHTS, expected_weights},
268+
{{DNNL_ARG_DIFF_DST, diff_dst},
269+
{DNNL_ARG_WEIGHTS, weights_},
285270
{DNNL_ARG_DIFF_SRC, diff_src}});
286271
}
287272
};
@@ -319,18 +304,17 @@ struct inner_product_backward_weights
319304
tensor& diff_bias,
320305
const data_type diff_weight_type,
321306
const engine& aengine = engine::cpu_engine()) {
322-
auto src_desc = src.get_desc().to_format_any();
323-
auto diff_dst_desc = diff_dst.get_desc().to_format_any();
307+
auto src_desc = src.get_desc();
308+
auto diff_dst_desc = diff_dst.get_desc();
324309
auto diff_weights_dims = src.get_dims();
325310
diff_weights_dims[0] = diff_dst.get_dim(1);
326311
data_type diff_dst_type = diff_dst.get_data_type();
327312
data_type diff_weight_type_in = data_type::undef== diff_weight_type ?
328313
diff_dst_type : diff_weight_type;
329-
auto diff_weights_desc =
330-
tensor::desc(diff_weights_dims, diff_weight_type_in, tag::any);
331314

332-
auto diff_bias_desc =
333-
tensor::desc({diff_dst.get_dim(1)}, diff_weight_type_in, tag::any);
315+
auto diff_weights_desc =
316+
diff_weights.get_desc().to_type(diff_weight_type_in);
317+
auto diff_bias_desc = diff_bias.get_desc().to_type(diff_weight_type_in);
334318

335319
// for forward hint, weights_desc should have same data_type
336320
// with other input desc, expect for bias_desc
@@ -349,18 +333,13 @@ struct inner_product_backward_weights
349333
: primitive_desc({src_desc, diff_weights_desc, diff_dst_desc},
350334
aengine, forward_hints);
351335

352-
auto expected_diff_dst = diff_dst.reorder_if_differ_in(pd.diff_dst_desc());
353-
auto expected_src = src.reorder_if_differ_in(pd.src_desc());
354336
if (diff_weights.is_empty()){
355337
diff_weights.init(pd.diff_weights_desc());
356338
}
357-
// Here we need to write to given strided buffer, so if given buffer is different with the best format
358-
// We need to firstly init a new buffer to store the output, and copy the output to a given buffer
359-
tensor expected_diff_weights = diff_weights.get_desc() == pd.diff_weights_desc() ? diff_weights : tensor(pd.diff_weights_desc());
360339

361-
exec_args args {{DNNL_ARG_DIFF_DST, expected_diff_dst},
362-
{DNNL_ARG_SRC, expected_src},
363-
{DNNL_ARG_DIFF_WEIGHTS ,expected_diff_weights}};
340+
exec_args args{{DNNL_ARG_DIFF_DST, diff_dst},
341+
{DNNL_ARG_SRC, src},
342+
{DNNL_ARG_DIFF_WEIGHTS, diff_weights}};
364343

365344
if (with_diff_bias) {
366345
if (diff_bias.is_empty()){
@@ -373,7 +352,6 @@ struct inner_product_backward_weights
373352
}
374353

375354
super(pd).execute(stream::default_stream(), args);
376-
expected_diff_weights.reorder_to_if_differ_from(diff_weights);
377355
}
378356
};
379357

tests/cpu/common_utils.py

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,29 +1065,59 @@ def get_reorder_info(self, line):
10651065
assert self.is_dnnl_reorder(line)
10661066
tokens = line.split(',')
10671067
src_desc, dst_desc = tokens[6].split(' ')
1068-
src_dtype = src_desc.split('::')[0].split('-')
1068+
src_dtype = src_desc.split('::')[0].split('_')
10691069
src_format = src_desc.split('::')[1]
1070-
dst_dtype = dst_desc.split('::')[0].split('-')
1070+
dst_dtype = dst_desc.split('::')[0].split('_')
10711071
dst_format = dst_desc.split('::')[1]
10721072
return src_dtype, src_format, dst_dtype, dst_format
10731073

1074+
def isPlainFormat(self, check_format):
1075+
format_index = 0
1076+
format = ""
1077+
for check in check_format.split(':'):
1078+
if check == "blocked":
1079+
break
1080+
format_index = format_index+1
1081+
format = check_format.split(':')[format_index+1]
1082+
# ref to https://spec.oneapi.io/versions/latest/elements/oneDNN/source/data_model/memory/formats.html#
1083+
format_list=["a",
1084+
"ab","ba",
1085+
"acb","abc","bac","cba","bca",
1086+
"abcd","abdc","acdb","bacd","bcda","cdba","dcab",
1087+
"abcde","abdec","acbde","acdeb","bacde","bcdea","cdeba","decab",
1088+
"abcdef","acbdef","defcab"]
1089+
for f in format_list:
1090+
if f == format:
1091+
return True
1092+
return False
1093+
1094+
def RedundantReorder(self, line):
1095+
if not self.is_dnnl_reorder(line):
1096+
return False
1097+
src_dtype, src_format, dst_dtype, dst_format = self.get_reorder_info(line)
1098+
return src_dtype[1] == dst_dtype[1] and src_format == dst_format
1099+
10741100
def ReorderForPack(self, line):
10751101
if not self.is_dnnl_reorder(line):
10761102
return False
10771103
src_dtype, src_format, dst_dtype, dst_format = self.get_reorder_info(line)
1078-
return src_dtype == dst_dtype
1104+
if self.isPlainFormat(src_format) and self.isPlainFormat(dst_format): # for prepack, at least dst should be blocked format and not in the format list
1105+
return False
1106+
return src_dtype[1] == dst_dtype[1]
10791107

10801108
def OnlyReorderDtype(self, line):
10811109
if not self.is_dnnl_reorder(line):
10821110
return False
10831111
src_dtype, src_format, dst_dtype, dst_format = self.get_reorder_info(line)
1084-
return src_dtype != dst_dtype and src_format == dst_dtype
1112+
return src_dtype[1] != dst_dtype[1] and src_format == dst_format
10851113

10861114
def OnlyReorderFormat(self, line):
10871115
if not self.is_dnnl_reorder(line):
10881116
return False
10891117
src_dtype, src_format, dst_dtype, dst_format = self.get_reorder_info(line)
1090-
return src_dtype == dst_dtype and src_format != dst_dtype
1118+
if self.isPlainFormat(src_format) and not self.isPlainFormat(dst_format): # reorder from plain format to blocked, should be prepack reorder
1119+
return False
1120+
return src_dtype[1] == dst_dtype[1] and src_format != dst_format
10911121

10921122
def assertOnlyReorderDtype(self, line):
10931123
assert OnlyReorderDtype(line), 'the verbose msg shows not only reorder dtype'

tests/cpu/linear_reorder.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import torch
2+
import intel_pytorch_extension as ipex
3+
import torch.nn as nn
4+
import itertools
5+
6+
class Model(nn.Module):
7+
def __init__(self, ic, oc, bias):
8+
super(Model, self).__init__()
9+
self.linear = nn.Linear(ic, oc, bias=bias)
10+
11+
def forward(self, input):
12+
return self.linear(input)
13+
14+
def run_model(dtype=None):
15+
out_feature = [1024, 256, 1, torch.randint(3, 10, (1, )).item()]
16+
in_feature = [128, 479, torch.randint(3, 10, (1, )).item()]
17+
input_shapes=[]
18+
for s in in_feature:
19+
input_shapes += [(128, s), (2, 64, s), (2, 2, 32, s)]
20+
options = itertools.product(out_feature, [True, False], input_shapes)
21+
for out_features, bias, x_shape in options:
22+
in_features = x_shape[-1]
23+
x = torch.randn(x_shape, dtype=torch.float32).requires_grad_()
24+
model = Model(in_features, out_features, bias)
25+
optimizer = torch.optim.Adagrad(model.parameters(), lr=0.1)
26+
if dtype == 0 :
27+
conf = ipex.AmpConf(torch.float32)
28+
model, optimizer = ipex.optimize(model, dtype=torch.float32, optimizer=optimizer, level='O1')
29+
with ipex.amp.autocast(enabled=True, configure=conf):
30+
run_mod = model.forward(x).sum()
31+
elif dtype == 1 :
32+
conf = ipex.AmpConf(torch.bfloat16)
33+
model, optimizer = ipex.optimize(model, dtype=torch.bfloat16, optimizer=optimizer, level='O1')
34+
with ipex.amp.autocast(enabled=True, configure=conf):
35+
run_mod = model.forward(x).sum()
36+
else: # reserved
37+
pass
38+
optimizer.zero_grad()
39+
run_mod.backward()
40+
optimizer.step()
41+
42+
43+
if __name__ == "__main__":
44+
print(f"fp32, {'*' * 50}")
45+
run_model(0)
46+
47+
print(f"bf16, {'*' * 50}")
48+
run_model(1)

tests/cpu/test_linear_reorder.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import unittest
2+
from common_utils import VerboseTestCase
3+
import subprocess
4+
class TestLinearReorder(VerboseTestCase):
5+
def test_linear_reorder(self):
6+
with subprocess.Popen('DNNL_VERBOSE=1 python -u linear_reorder.py', shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) as p:
7+
segmentation = {
8+
'fp32': {'reorder_for_pack': 2, 'reorder_for_dtype': 0, 'reorder_for_format': 0, 'redundent_reorder' : 0,},
9+
'bf16': {'reorder_for_pack': 3, 'reorder_for_dtype': 0, 'reorder_for_format': 0, 'redundent_reorder' : 0,},
10+
} # there should be only reorders on prepack, if any other reorder appears, will cause fail
11+
seg = None
12+
for line in p.stdout.readlines():
13+
line = str(line, 'utf-8').strip()
14+
if line.endswith('***************'):
15+
seg = line.strip().split(',')[0]
16+
continue
17+
# Following is to check if there is the reorder number is as excepted
18+
if self.is_dnnl_verbose(line) and self.ReorderForPack(line):
19+
segmentation[seg]['reorder_for_pack'] -= 1
20+
self.assertTrue(segmentation[seg]['reorder_for_pack'] >=0, "show unexpected reorder for pack")
21+
22+
if self.is_dnnl_verbose(line) and self.OnlyReorderDtype(line):
23+
segmentation[seg]['reorder_for_dtype'] -= 1
24+
self.assertTrue(segmentation[seg]['reorder_for_dtype'] >=0, "show unexpected reorder for dtype")
25+
26+
if self.is_dnnl_verbose(line) and self.OnlyReorderFormat(line):
27+
segmentation[seg]['reorder_for_format'] -= 1
28+
self.assertTrue(segmentation[seg]['reorder_for_format'] >=0, "show unexpected reorder for format")
29+
30+
if self.is_dnnl_verbose(line) and self.RedundantReorder(line):
31+
segmentation[seg]['redundent_reorder'] -= 1
32+
self.assertTrue(segmentation[seg]['redundent_reorder'] >=0, "show unexpected redundent reorder")
33+
34+
35+
if __name__ == '__main__':
36+
test = unittest.main()

torch_ipex/csrc/cpu/Linear.cpp

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,23 @@ std::tuple<at::Tensor, at::Tensor, at::Tensor> linear_backward_impl(
155155
at::Tensor grad_input, grad_weight, grad_bias;
156156
// weight's desc is needed for both bw_d and bw_w
157157
const ideep::tensor w = get_linear_prepacked_weight(weight, out_features, in_features);
158-
auto input_reshaped = input.dim() > 2 ? input.reshape({-1, input.size(input.dim() - 1)}) : input;
159-
auto grad_output_reshaped = grad_output.dim() > 2 ? grad_output.reshape({-1, grad_output.size(grad_output.dim() - 1)}) : grad_output;
158+
// for IP, currently both stag=ab and dtag=ab are only supported by onednn, we
159+
// need first make both src and diff_dst contiguous if the input or
160+
// grad_output is not expected
161+
auto input_contiguous = input.is_contiguous() ? input : input.contiguous();
162+
auto input_reshaped =
163+
input_contiguous.dim() > 2
164+
? input_contiguous.reshape(
165+
{-1, input_contiguous.size(input_contiguous.dim() - 1)})
166+
: input_contiguous;
167+
auto grad_output_contiguous =
168+
grad_output.is_contiguous() ? grad_output : grad_output.contiguous();
169+
auto grad_output_reshaped =
170+
grad_output_contiguous.dim() > 2
171+
? grad_output_contiguous.reshape(
172+
{-1,
173+
grad_output_contiguous.size(grad_output_contiguous.dim() - 1)})
174+
: grad_output_contiguous;
160175
const ideep::tensor grady = itensor_view_from_dense(grad_output_reshaped);
161176
if (output_mask[0]) {
162177
at::Tensor grad_input_reshaped = at::empty_like(input_reshaped);
@@ -166,7 +181,10 @@ std::tuple<at::Tensor, at::Tensor, at::Tensor> linear_backward_impl(
166181
ideep::inner_product_backward_data::compute(
167182
grady, w, input_reshaped.sizes().vec(), gradx
168183
);
169-
grad_input = input.dim() > 2 ? grad_input_reshaped.reshape(input.sizes().vec()) : grad_input_reshaped;
184+
grad_input =
185+
input_contiguous.dim() > 2
186+
? grad_input_reshaped.reshape(input_contiguous.sizes().vec())
187+
: grad_input_reshaped;
170188
}
171189
if (output_mask[1] || output_mask[2]) {
172190
//bw_w

0 commit comments

Comments
 (0)