Merge pull request #67 from chsasank/patch-rl

chsasank · web-flow · commit f71137c28326 · 2017-04-14T22:44:42.000+05:30
Patch RL tutorial
diff --git a/intermediate_source/reinforcement_q_learning.py b/intermediate_source/reinforcement_q_learning.py
@@ -72,13 +72,13 @@
 import torch.nn.functional as F
 import torchvision.transforms as T
 
-env = gym.make('CartPole-v0')
+env = gym.make('CartPole-v0').unwrapped
 
 is_ipython = 'inline' in matplotlib.get_backend()
 if is_ipython:
     from IPython import display
 
-
+plt.ion()
 ######################################################################
 # Replay Memory
 # -------------
@@ -263,8 +263,10 @@ def get_screen():
     return resize(screen).unsqueeze(0)
 
 env.reset()
+plt.figure()
 plt.imshow(get_screen().squeeze(0).permute(
     1, 2, 0).numpy(), interpolation='none')
+plt.title('Example extracted screen')
 plt.show()
 
 
@@ -335,9 +337,10 @@ def select_action(state):
 
 
 def plot_durations():
-    plt.figure(1)
+    plt.figure(2)
     plt.clf()
     durations_t = torch.Tensor(episode_durations)
+    plt.title('Training...')
     plt.xlabel('Episode')
     plt.ylabel('Duration')
     plt.plot(durations_t.numpy())
@@ -367,7 +370,6 @@ def plot_durations():
 
 last_sync = 0
 
-
 def optimize_model():
     global last_sync
     if len(memory) < BATCH_SIZE:
@@ -456,8 +458,11 @@ def optimize_model():
 
         # Perform one step of the optimization (on the target network)
         optimize_model()
-
         if done:
             episode_durations.append(t + 1)
             plot_durations()
             break
+
+env.close()
+plt.ioff()
+plt.show()