@@ -1649,26 +1649,26 @@ modules we need.
1649
1649
1650
1650
1651
1651
0%| | 0/10000 [00:00<?, ?it/s]
1652
- 8%|8 | 800/10000 [00:00<00:08, 1095.64it /s]
1653
- 16%|#6 | 1600/10000 [00:05<00:33, 253.72it /s]
1654
- 24%|##4 | 2400/10000 [00:06<00:19, 380.48it /s]
1655
- 32%|###2 | 3200/10000 [00:07<00:13, 496.91it /s]
1656
- 40%|#### | 4000/10000 [00:08<00:10, 597.54it /s]
1657
- 48%|####8 | 4800/10000 [00:09<00:07, 681.11it /s]
1658
- 56%|#####6 | 5600/10000 [00:09<00:05, 748.83it /s]
1659
- reward: -2.48 (r0 = -0.90 ), reward eval: reward: -0.00, reward normalized=-1.56 /6.87 , grad norm= 191.04 , loss_value= 506.83 , loss_actor= 11.94 , target value: -8.02 : 56%|#####6 | 5600/10000 [00:11 <00:05, 748.83it /s]
1660
- reward: -2.48 (r0 = -0.90 ), reward eval: reward: -0.00, reward normalized=-1.56 /6.87 , grad norm= 191.04 , loss_value= 506.83 , loss_actor= 11.94 , target value: -8.02 : 64%|######4 | 6400/10000 [00:12 <00:07, 464.23it /s]
1661
- reward: -0.12 (r0 = -0.90 ), reward eval: reward: -0.00, reward normalized=-1.67 /5.78 , grad norm= 57.83 , loss_value= 305.88 , loss_actor= 11.90 , target value: -11.10 : 64%|######4 | 6400/10000 [00:14 <00:07, 464.23it /s]
1662
- reward: -0.12 (r0 = -0.90 ), reward eval: reward: -0.00, reward normalized=-1.67 /5.78 , grad norm= 57.83 , loss_value= 305.88 , loss_actor= 11.90 , target value: -11.10 : 72%|#######2 | 7200/10000 [00:16<00:08, 343.38it /s]
1663
- reward: -3.24 (r0 = -0.90 ), reward eval: reward: -0.00, reward normalized=-2.27 /5.04 , grad norm= 112.57 , loss_value= 189.73 , loss_actor= 13.83 , target value: -14.60 : 72%|#######2 | 7200/10000 [00:18<00:08, 343.38it /s]
1664
- reward: -3.24 (r0 = -0.90 ), reward eval: reward: -0.00, reward normalized=-2.27 /5.04 , grad norm= 112.57 , loss_value= 189.73 , loss_actor= 13.83 , target value: -14.60 : 80%|######## | 8000/10000 [00:20<00:06, 291.23it /s]
1665
- reward: -4.77 (r0 = -0.90 ), reward eval: reward: -0.00, reward normalized=-2.34 /4.67 , grad norm= 146.11 , loss_value= 170.75 , loss_actor= 15.86 , target value: -15.01 : 80%|######## | 8000/10000 [00:22<00:06, 291.23it /s]
1666
- reward: -4.77 (r0 = -0.90 ), reward eval: reward: -0.00, reward normalized=-2.34 /4.67 , grad norm= 146.11 , loss_value= 170.75 , loss_actor= 15.86 , target value: -15.01 : 88%|########8 | 8800/10000 [00:23<00:04, 264.40it /s]
1667
- reward: -4.44 (r0 = -0.90 ), reward eval: reward: -20.30 , reward normalized=-2.15 /5.06 , grad norm= 50.42 , loss_value= 210.07 , loss_actor= 12.82 , target value: -15.89 : 88%|########8 | 8800/10000 [00:29<00:04, 264.40it /s]
1668
- reward: -4.44 (r0 = -0.90 ), reward eval: reward: -20.30 , reward normalized=-2.15 /5.06 , grad norm= 50.42 , loss_value= 210.07 , loss_actor= 12.82 , target value: -15.89 : 96%|#########6| 9600/10000 [00:31<00:02, 180.97it /s]
1669
- reward: -4.88 (r0 = -0.90 ), reward eval: reward: -20.30 , reward normalized=-3.54 /4.43 , grad norm= 132.01 , loss_value= 193.44 , loss_actor= 18.56 , target value: -24.43 : 96%|#########6| 9600/10000 [00:33<00:02, 180.97it /s]
1670
- reward: -4.88 (r0 = -0.90 ), reward eval: reward: -20.30 , reward normalized=-3.54 /4.43 , grad norm= 132.01 , loss_value= 193.44 , loss_actor= 18.56 , target value: -24.43 : : 10400it [00:35, 190.39it /s]
1671
- reward: -16.37 (r0 = -0.90 ), reward eval: reward: -20.30 , reward normalized=-3.37/5.43 , grad norm= 111.49 , loss_value= 240.03 , loss_actor= 22.51 , target value: -23.29 : : 10400it [00:37, 190.39it /s]
1652
+ 8%|8 | 800/10000 [00:00<00:08, 1068.13it /s]
1653
+ 16%|#6 | 1600/10000 [00:05<00:33, 251.38it /s]
1654
+ 24%|##4 | 2400/10000 [00:06<00:20, 376.45it /s]
1655
+ 32%|###2 | 3200/10000 [00:07<00:13, 491.36it /s]
1656
+ 40%|#### | 4000/10000 [00:08<00:10, 590.41it /s]
1657
+ 48%|####8 | 4800/10000 [00:09<00:07, 672.84it /s]
1658
+ 56%|#####6 | 5600/10000 [00:09<00:05, 738.13it /s]
1659
+ reward: -2.61 (r0 = -1.78 ), reward eval: reward: -0.00, reward normalized=-2.47 /6.09 , grad norm= 110.28 , loss_value= 330.30 , loss_actor= 13.74 , target value: -14.68 : 56%|#####6 | 5600/10000 [00:12 <00:05, 738.13it /s]
1660
+ reward: -2.61 (r0 = -1.78 ), reward eval: reward: -0.00, reward normalized=-2.47 /6.09 , grad norm= 110.28 , loss_value= 330.30 , loss_actor= 13.74 , target value: -14.68 : 64%|######4 | 6400/10000 [00:13 <00:07, 456.92it /s]
1661
+ reward: -0.12 (r0 = -1.78 ), reward eval: reward: -0.00, reward normalized=-2.22 /5.65 , grad norm= 57.01 , loss_value= 298.59 , loss_actor= 14.41 , target value: -14.19 : 64%|######4 | 6400/10000 [00:15 <00:07, 456.92it /s]
1662
+ reward: -0.12 (r0 = -1.78 ), reward eval: reward: -0.00, reward normalized=-2.22 /5.65 , grad norm= 57.01 , loss_value= 298.59 , loss_actor= 14.41 , target value: -14.19 : 72%|#######2 | 7200/10000 [00:16<00:07, 364.37it /s]
1663
+ reward: -1.49 (r0 = -1.78 ), reward eval: reward: -0.00, reward normalized=-2.60 /5.18 , grad norm= 167.05 , loss_value= 258.41 , loss_actor= 13.21 , target value: -16.61 : 72%|#######2 | 7200/10000 [00:18<00:07, 364.37it /s]
1664
+ reward: -1.49 (r0 = -1.78 ), reward eval: reward: -0.00, reward normalized=-2.60 /5.18 , grad norm= 167.05 , loss_value= 258.41 , loss_actor= 13.21 , target value: -16.61 : 80%|######## | 8000/10000 [00:20<00:06, 300.22it /s]
1665
+ reward: -4.79 (r0 = -1.78 ), reward eval: reward: -0.00, reward normalized=-2.38 /4.85 , grad norm= 79.00 , loss_value= 206.23 , loss_actor= 19.63 , target value: -14.99 : 80%|######## | 8000/10000 [00:22<00:06, 300.22it /s]
1666
+ reward: -4.79 (r0 = -1.78 ), reward eval: reward: -0.00, reward normalized=-2.38 /4.85 , grad norm= 79.00 , loss_value= 206.23 , loss_actor= 19.63 , target value: -14.99 : 88%|########8 | 8800/10000 [00:23<00:04, 268.54it /s]
1667
+ reward: -5.15 (r0 = -1.78 ), reward eval: reward: -1.97 , reward normalized=-2.48 /5.30 , grad norm= 105.76 , loss_value= 197.28 , loss_actor= 12.32 , target value: -17.70 : 88%|########8 | 8800/10000 [00:29<00:04, 268.54it /s]
1668
+ reward: -5.15 (r0 = -1.78 ), reward eval: reward: -1.97 , reward normalized=-2.48 /5.30 , grad norm= 105.76 , loss_value= 197.28 , loss_actor= 12.32 , target value: -17.70 : 96%|#########6| 9600/10000 [00:31<00:02, 177.73it /s]
1669
+ reward: -4.73 (r0 = -1.78 ), reward eval: reward: -1.97 , reward normalized=-2.81 /4.37 , grad norm= 67.84 , loss_value= 147.29 , loss_actor= 10.93 , target value: -19.58 : 96%|#########6| 9600/10000 [00:33<00:02, 177.73it /s]
1670
+ reward: -4.73 (r0 = -1.78 ), reward eval: reward: -1.97 , reward normalized=-2.81 /4.37 , grad norm= 67.84 , loss_value= 147.29 , loss_actor= 10.93 , target value: -19.58 : : 10400it [00:35, 186.18it /s]
1671
+ reward: -1.02 (r0 = -1.78 ), reward eval: reward: -1.97 , reward normalized=-2.69/4.96 , grad norm= 80.73 , loss_value= 193.05 , loss_actor= 12.62 , target value: -19.78 : : 10400it [00:37, 186.18it /s]
1672
1672
1673
1673
1674
1674
@@ -1738,7 +1738,7 @@ To iterate further on this loss module we might consider:
1738
1738
1739
1739
.. rst-class :: sphx-glr-timing
1740
1740
1741
- **Total running time of the script: ** ( 0 minutes 48.741 seconds)
1741
+ **Total running time of the script: ** ( 0 minutes 49.185 seconds)
1742
1742
1743
1743
1744
1744
.. _sphx_glr_download_advanced_coding_ddpg.py :
0 commit comments