13
13
import itertools
14
14
import torch
15
15
import intel_pytorch_extension as ipex
16
+ import contextlib
17
+ import io
16
18
17
19
from common_ipex_conf import AutoMixPrecision , AutoDNNL
18
20
@@ -1303,6 +1305,33 @@ def test_unsqueeze(self):
1303
1305
x_dpcpp = x .clone ().to (device = device )
1304
1306
self .assertEqual (x_dpcpp .unsqueeze (1 ), x .unsqueeze (1 ))
1305
1307
1308
+ with AutoDNNL (True ):
1309
+ x = torch .randn (3 , 64 , 64 , dtype = torch .float32 )
1310
+ x_xpu = x .clone ().to (device = device )
1311
+ conv2d_cpu = torch .nn .Conv2d (3 , 6 , (3 , 3 ))
1312
+ conv2d_xpu = copy .deepcopy (conv2d_cpu ).to (device = device )
1313
+ x_nchw = x .unsqueeze (0 )
1314
+ x_xpu_nchw = x_xpu .unsqueeze (0 )
1315
+ self .assertEqual (conv2d_cpu (x_nchw ), conv2d_xpu (x_xpu_nchw ))
1316
+
1317
+ conv2d_cpu = torch .nn .Conv2d (3 , 1 , (3 , 3 ))
1318
+ conv2d_xpu = copy .deepcopy (conv2d_cpu ).to (ipex .DEVICE )
1319
+ # reshape the conv2d weight to chw
1320
+ conv2d_weight_seq = conv2d_xpu .weight .clone ().squeeze ()
1321
+ # reshape the conv2d weight to nchw
1322
+ conv2d_weight_unseq = torch .unsqueeze (conv2d_weight_seq , 0 )
1323
+
1324
+ conv2d_xpu .weight .data = conv2d_weight_unseq
1325
+
1326
+ a = torch .randn (1 , 3 , 10 , 10 ).to (ipex .DEVICE )
1327
+ # Make sure the conv2d_xpu.weight is blocked format
1328
+ conv2d_xpu (a )
1329
+ # Make sure the unsqueeze does not trigger reorder
1330
+ conv2d_weight_unseq = torch .unsqueeze (conv2d_weight_seq , 0 )
1331
+ self .assertEqual (conv2d_xpu (a ), conv2d_cpu (a .to ("cpu" )))
1332
+
1333
+
1334
+
1306
1335
class TestSoftMax (TestCase ):
1307
1336
def test_softmax (self ):
1308
1337
with AutoDNNL (True ):
@@ -1580,7 +1609,7 @@ def _lstm_params_list(self, cell):
1580
1609
if cell == "RNN" :
1581
1610
params_dict ["nonlinearity" ] = ["tanh" ] # ["tanh", "relu"] TODO relu has accuracy issue
1582
1611
elif cell == "GRU" :
1583
- params_dict ["nonlinearity" ] = ["" ]
1612
+ params_dict ["nonlinearity" ] = ["" ]
1584
1613
1585
1614
params_list = []
1586
1615
@@ -1592,16 +1621,16 @@ def _test_lstm(self, training):
1592
1621
rand_seed = int (get_rand_seed ())
1593
1622
print ("{} rand sed: {}" .format (sys ._getframe ().f_code .co_name , rand_seed ))
1594
1623
torch .manual_seed (rand_seed )
1595
-
1624
+
1596
1625
params_list = self ._lstm_params_list ("LSTM" )
1597
1626
1598
1627
for input_size , hidden_size , num_layers , bidirectional , bias , empty_state , batch_first , dropout , batch_size , seq_len in itertools .product (* params_list ):
1599
1628
# dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1
1600
1629
if dropout > 0 and num_layers == 1 :
1601
1630
continue
1602
-
1631
+
1603
1632
num_directions = 2 if bidirectional else 1
1604
-
1633
+
1605
1634
if batch_first :
1606
1635
input = torch .randn (batch_size , seq_len , input_size )
1607
1636
else :
@@ -1649,7 +1678,7 @@ def _test_lstm(self, training):
1649
1678
hy_cpu [0 ].sum ().backward (retain_graph = True )
1650
1679
hy_dpcpp [0 ].sum ().backward (retain_graph = True )
1651
1680
self .assertEqual (h0_dpcpp .grad .to ('cpu' ), h_cpu .grad )
1652
-
1681
+
1653
1682
hy_cpu [1 ].sum ().backward (retain_graph = True )
1654
1683
hy_dpcpp [1 ].sum ().backward (retain_graph = True )
1655
1684
self .assertEqual (c0_dpcpp .grad .to ('cpu' ), c_cpu .grad )
@@ -1658,16 +1687,16 @@ def _test_rnn(self, cell, training):
1658
1687
rand_seed = int (get_rand_seed ())
1659
1688
print ("{} rand sed: {}" .format (sys ._getframe ().f_code .co_name , rand_seed ))
1660
1689
torch .manual_seed (rand_seed )
1661
-
1690
+
1662
1691
params_list = self ._lstm_params_list (cell )
1663
1692
1664
1693
for input_size , hidden_size , num_layers , bidirectional , bias , empty_state , batch_first , dropout , batch_size , seq_len , nonlinearity in itertools .product (* params_list ):
1665
1694
# dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1
1666
1695
if dropout > 0 and num_layers == 1 :
1667
1696
continue
1668
-
1697
+
1669
1698
num_directions = 2 if bidirectional else 1
1670
-
1699
+
1671
1700
if batch_first :
1672
1701
input = torch .randn (batch_size , seq_len , input_size )
1673
1702
else :
@@ -1683,7 +1712,7 @@ def _test_rnn(self, cell, training):
1683
1712
model_cpu = torch .nn .RNN (input_size = input_size , hidden_size = hidden_size , num_layers = num_layers , bidirectional = bidirectional , bias = bias , dropout = dropout , batch_first = batch_first , nonlinearity = nonlinearity )
1684
1713
elif cell == "GRU" :
1685
1714
model_cpu = torch .nn .GRU (input_size = input_size , hidden_size = hidden_size , num_layers = num_layers , bidirectional = bidirectional , bias = bias , dropout = dropout , batch_first = batch_first )
1686
-
1715
+
1687
1716
model_cpu .train () if training else model_cpu .eval ()
1688
1717
1689
1718
input_dpcpp = input .clone ().to (device = device ).requires_grad_ (training )
@@ -1720,7 +1749,7 @@ def _test_pack_padded_sequence_lstm(self, training):
1720
1749
rand_seed = int (get_rand_seed ())
1721
1750
print ("{} rand sed: {}" .format (sys ._getframe ().f_code .co_name , rand_seed ))
1722
1751
torch .manual_seed (rand_seed )
1723
-
1752
+
1724
1753
embedding_dim = 1024
1725
1754
hidden_dim = 10
1726
1755
batch_size = 24
@@ -1755,7 +1784,7 @@ def _test_pack_padded_sequence_lstm(self, training):
1755
1784
1756
1785
lstm_out , hidden_out = lstm (embeds , (hidden_0 , hidden_1 ))
1757
1786
lstm_out , _ = torch .nn .utils .rnn .pad_packed_sequence (lstm_out , batch_first = True )
1758
-
1787
+
1759
1788
with AutoDNNL (True ):
1760
1789
lstm_out_dpcpp , hidden_out_dpcpp = lstm_dpcpp (embeds_dpcpp , (hidden_0_dpcpp , hidden_1_dpcpp ))
1761
1790
lstm_out_dpcpp , _ = torch .nn .utils .rnn .pad_packed_sequence (lstm_out_dpcpp , batch_first = True )
@@ -1770,16 +1799,16 @@ def _test_pack_padded_sequence_lstm(self, training):
1770
1799
self .assertEqual (sentences_dpcpp .grad .to ('cpu' ), sentences .grad )
1771
1800
self .assertEqual (lstm_dpcpp .weight_ih_l0 .grad .to ('cpu' ), lstm .weight_ih_l0 .grad )
1772
1801
self .assertEqual (lstm_dpcpp .weight_hh_l0 .grad .to ('cpu' ), lstm .weight_hh_l0 .grad )
1773
-
1802
+
1774
1803
self .assertEqual (lstm_dpcpp .bias_ih_l0 .grad .to ('cpu' ), lstm .bias_ih_l0 .grad )
1775
1804
self .assertEqual (lstm_dpcpp .bias_hh_l0 .grad .to ('cpu' ), lstm .bias_hh_l0 .grad )
1776
-
1805
+
1777
1806
self .assertEqual (hidden_0_dpcpp .grad .to ('cpu' ), hidden_0 .grad )
1778
1807
self .assertEqual (hidden_1_dpcpp .grad .to ('cpu' ), hidden_1 .grad )
1779
1808
1780
1809
def test_lstm_inference (self ):
1781
1810
self ._test_lstm (training = False )
1782
-
1811
+
1783
1812
def test_lstm_training (self ):
1784
1813
self ._test_lstm (training = True )
1785
1814
@@ -1937,6 +1966,17 @@ def test_upsample_bilinear2d_scale_factor(self):
1937
1966
y_dpcpp .sum ().backward ()
1938
1967
self .assertEqual (x_cpu .grad , x_dpcpp .grad )
1939
1968
1969
+ with AutoDNNL (True ):
1970
+ x = torch .randn (2 , 2 , 4 , 4 )
1971
+ x_cpu = x .clone ().requires_grad_ ()
1972
+ x_dpcpp = x .clone ().to (device = device ).requires_grad_ ()
1973
+ y_cpu = F .interpolate (x_cpu , scale_factor = [2 , 3 ], mode = 'bilinear' , align_corners = False , recompute_scale_factor = False )
1974
+ y_dpcpp = F .interpolate (x_dpcpp , scale_factor = [2 , 3 ], mode = 'bilinear' , align_corners = False , recompute_scale_factor = False )
1975
+ self .assertEqual (y_cpu , y_dpcpp )
1976
+ y_cpu .sum ().backward ()
1977
+ y_dpcpp .sum ().backward ()
1978
+ self .assertEqual (x_cpu .grad , x_dpcpp .grad )
1979
+
1940
1980
def test_upsample_bilinear2d_size (self ):
1941
1981
rand_seed = int (get_rand_seed ())
1942
1982
print ("{} rand sed: {}" .format (sys ._getframe ().f_code .co_name , rand_seed ))
0 commit comments