better to_cuda ops

chsasank · chsasank · commit 977057735548 · 2017-05-18T22:15:39.000+05:30
diff --git a/intermediate_source/reinforcement_q_learning.py b/intermediate_source/reinforcement_q_learning.py
@@ -68,17 +68,28 @@
 import torch
 import torch.nn as nn
 import torch.optim as optim
-import torch.autograd as autograd
 import torch.nn.functional as F
+from torch.autograd import Variable
 import torchvision.transforms as T
 
+
 env = gym.make('CartPole-v0').unwrapped
 
+# set up matplotlib
 is_ipython = 'inline' in matplotlib.get_backend()
 if is_ipython:
     from IPython import display
 
 plt.ion()
+
+# if gpu is to be used
+use_cuda = torch.cuda.is_available()
+FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
+LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor
+ByteTensor = torch.cuda.ByteTensor if use_cuda else torch.ByteTensor
+Tensor = FloatTensor
+
+
 ######################################################################
 # Replay Memory
 # -------------
@@ -258,14 +269,14 @@ def get_screen():
     # Convert to float, rescare, convert to torch tensor
     # (this doesn't require a copy)
     screen = np.ascontiguousarray(screen, dtype=np.float32) / 255
-    screen = torch.from_numpy(screen)
+    screen = torch.from_numpy(screen).type(Tensor)
     # Resize, and add a batch dimension (BCHW)
     return resize(screen).unsqueeze(0)
 
 env.reset()
 plt.figure()
-plt.imshow(get_screen().squeeze(0).permute(
-    1, 2, 0).numpy(), interpolation='none')
+plt.imshow(get_screen().cpu().squeeze(0).permute(1, 2, 0).numpy(),
+           interpolation='none')
 plt.title('Example extracted screen')
 plt.show()
 
@@ -300,22 +311,14 @@ def get_screen():
 EPS_START = 0.9
 EPS_END = 0.05
 EPS_DECAY = 200
-USE_CUDA = torch.cuda.is_available()
 
 model = DQN()
-memory = ReplayMemory(10000)
-optimizer = optim.RMSprop(model.parameters())
 
-if USE_CUDA:
+if use_cuda:
     model.cuda()
 
-
-class Variable(autograd.Variable):
-
-    def __init__(self, data, *args, **kwargs):
-        if USE_CUDA:
-            data = data.cuda()
-        super(Variable, self).__init__(data, *args, **kwargs)
+optimizer = optim.RMSprop(model.parameters())
+memory = ReplayMemory(10000)
 
 
 steps_done = 0
@@ -328,9 +331,10 @@ def select_action(state):
         math.exp(-1. * steps_done / EPS_DECAY)
     steps_done += 1
     if sample > eps_threshold:
-        return model(Variable(state, volatile=True)).data.max(1)[1].cpu()
+        return model(
+            Variable(state, volatile=True).type(FloatTensor)).data.max(1)[1]
     else:
-        return torch.LongTensor([[random.randrange(2)]])
+        return LongTensor([[random.randrange(2)]])
 
 
 episode_durations = []
@@ -339,7 +343,7 @@ def select_action(state):
 def plot_durations():
     plt.figure(2)
     plt.clf()
-    durations_t = torch.Tensor(episode_durations)
+    durations_t = torch.FloatTensor(episode_durations)
     plt.title('Training...')
     plt.xlabel('Episode')
     plt.ylabel('Duration')
@@ -370,6 +374,7 @@ def plot_durations():
 
 last_sync = 0
 
+
 def optimize_model():
     global last_sync
     if len(memory) < BATCH_SIZE:
@@ -380,10 +385,9 @@ def optimize_model():
     batch = Transition(*zip(*transitions))
 
     # Compute a mask of non-final states and concatenate the batch elements
-    non_final_mask = torch.ByteTensor(
-        tuple(map(lambda s: s is not None, batch.next_state)))
-    if USE_CUDA:
-        non_final_mask = non_final_mask.cuda()
+    non_final_mask = ByteTensor(tuple(map(lambda s: s is not None,
+                                          batch.next_state)))
+
     # We don't want to backprop through the expected action values and volatile
     # will save us on temporarily changing the model parameters'
     # requires_grad to False!
@@ -440,7 +444,7 @@ def optimize_model():
         # Select and perform an action
         action = select_action(state)
         _, reward, done, _ = env.step(action[0, 0])
-        reward = torch.Tensor([reward])
+        reward = Tensor([reward])
 
         # Observe new state
         last_screen = current_screen
@@ -463,6 +467,7 @@ def optimize_model():
             plot_durations()
             break
 
+print('Complete')
 env.close()
 plt.ioff()
 plt.show()