update target network weight during episode after x timesteps

Mdhvince · Mdhvince · commit d4b1ee1b5a27 · 2022-11-02T22:48:47.000+01:00
diff --git a/intermediate_source/reinforcement_q_learning.py b/intermediate_source/reinforcement_q_learning.py
@@ -492,9 +492,10 @@ def optimize_model():
             episode_durations.append(t + 1)
             plot_durations()
             break
-    # Update the target network, copying all weights and biases in DQN
-    if i_episode % TARGET_UPDATE == 0:
-        target_net.load_state_dict(policy_net.state_dict())
+
+        # Update the target network, copying all weights and biases in DQN
+        if t % TARGET_UPDATE == 0:
+            target_net.load_state_dict(policy_net.state_dict())
 
 print('Complete')
 env.render()