From 243ddd0309f8c8b9ac67cc823fe6bfd0fc5e3b2f Mon Sep 17 00:00:00 2001 From: Sasank Chilamkurthy Date: Tue, 11 Apr 2017 22:54:37 +0530 Subject: [PATCH 1/2] Better LR scheduler for TL tutorial --- beginner_source/transfer_learning_tutorial.py | 92 +++++++++---------- 1 file changed, 45 insertions(+), 47 deletions(-) diff --git a/beginner_source/transfer_learning_tutorial.py b/beginner_source/transfer_learning_tutorial.py index a81f7bbda52..9c5038bb1fb 100644 --- a/beginner_source/transfer_learning_tutorial.py +++ b/beginner_source/transfer_learning_tutorial.py @@ -33,6 +33,8 @@ # License: BSD # Author: Sasank Chilamkurthy +from __future__ import print_function, division + import torch import torch.nn as nn import torch.optim as optim @@ -134,13 +136,11 @@ def imshow(inp, title=None): # - Scheduling the learning rate # - Saving (deep copying) the best model # -# In the following, ``optim_scheduler`` is a function which returns an ``optim.SGD`` -# object when called as ``optim_scheduler(model, epoch)``. This is useful -# when we want to change the learning rate or restrict the parameters we -# want to optimize. -# +# In the following, parameter ``lr_scheduler(optimizer, epoch)`` +# is a function which modifies ``optimizer`` so that the learning +# rate is changed according to desired schedule. -def train_model(model, criterion, optim_scheduler, num_epochs=25): +def train_model(model, criterion, optimizer, lr_scheduler, num_epochs=25): since = time.time() best_model = model @@ -153,7 +153,7 @@ def train_model(model, criterion, optim_scheduler, num_epochs=25): # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': - optimizer = optim_scheduler(model, epoch) + optimizer = lr_scheduler(optimizer, epoch) model.train(True) # Set model to training mode else: model.train(False) # Set model to evaluate mode @@ -209,6 +209,24 @@ def train_model(model, criterion, optim_scheduler, num_epochs=25): print('Best val Acc: {:4f}'.format(best_acc)) return best_model +###################################################################### +# Learning rate scheduler +# ^^^^^^^^^^^^^^^^^^^^^^^ +# Let's create our learning rate scheduler. We will exponentially +# decrease the learning rate once every few epochs. + +def exp_lr_scheduler(optimizer, epoch, init_lr=0.001, lr_decay_epoch=7): + """Decay learning rate by a factor of 0.1 every lr_decay_epoch epochs.""" + lr = init_lr * (0.1**(epoch // lr_decay_epoch)) + + if epoch % lr_decay_epoch == 0: + print('LR is set to {}'.format(lr)) + + for param_group in optimizer.param_groups: + param_group['lr'] = lr + + return optimizer + ###################################################################### # Visualizing the model predictions @@ -240,33 +258,20 @@ def visualize_model(model, num_images=5): # Finetuning the convnet # ---------------------- # -# First, let's create our learning rate scheduler. We will exponentially -# decrease the learning rate once every few epochs. -# - -def optim_scheduler_ft(model, epoch, init_lr=0.001, lr_decay_epoch=7): - lr = init_lr * (0.1**(epoch // lr_decay_epoch)) - - if epoch % lr_decay_epoch == 0: - print('LR is set to {}'.format(lr)) - - optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9) - return optimizer - - -###################################################################### # Load a pretrained model and reset final fully connected layer. # -model = models.resnet18(pretrained=True) -num_ftrs = model.fc.in_features -model.fc = nn.Linear(num_ftrs, 2) +model_ft = models.resnet18(pretrained=True) +num_ftrs = model_ft.fc.in_features +model_ft.fc = nn.Linear(num_ftrs, 2) if use_gpu: - model = model.cuda() + model_ft = model_ft.cuda() criterion = nn.CrossEntropyLoss() +# Observe that all parameters are being optimized +optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9) ###################################################################### # Train and evaluate @@ -276,12 +281,13 @@ def optim_scheduler_ft(model, epoch, init_lr=0.001, lr_decay_epoch=7): # minute. # -model = train_model(model, criterion, optim_scheduler_ft, num_epochs=25) +model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, + num_epochs=25) ###################################################################### # -visualize_model(model) +visualize_model(model_ft) ###################################################################### @@ -296,31 +302,22 @@ def optim_scheduler_ft(model, epoch, init_lr=0.001, lr_decay_epoch=7): # `here `__. # -model = torchvision.models.resnet18(pretrained=True) -for param in model.parameters(): +model_conv = torchvision.models.resnet18(pretrained=True) +for param in model_conv.parameters(): param.requires_grad = False # Parameters of newly constructed modules have requires_grad=True by default -num_ftrs = model.fc.in_features -model.fc = nn.Linear(num_ftrs, 2) +num_ftrs = model_conv.fc.in_features +model_conv.fc = nn.Linear(num_ftrs, 2) if use_gpu: - model = model.cuda() + model_conv = model_conv.cuda() criterion = nn.CrossEntropyLoss() -###################################################################### -# Let's write ``optim_scheduler``. We will use previous lr scheduler. Also -# we need to optimize only the parameters of final FC layer. -# - -def optim_scheduler_conv(model, epoch, init_lr=0.001, lr_decay_epoch=7): - lr = init_lr * (0.1**(epoch // lr_decay_epoch)) - if epoch % lr_decay_epoch == 0: - print('LR is set to {}'.format(lr)) - - optimizer = optim.SGD(model.fc.parameters(), lr=lr, momentum=0.9) - return optimizer +# Observe that only parameters of final layer are being optimized as +# opoosed to before. +optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9) ###################################################################### @@ -332,12 +329,13 @@ def optim_scheduler_conv(model, epoch, init_lr=0.001, lr_decay_epoch=7): # network. However, forward does need to be computed. # -model = train_model(model, criterion, optim_scheduler_conv) +model_conv = train_model(model_conv, criterion, optimizer_conv, + exp_lr_scheduler, num_epochs=25) ###################################################################### # -visualize_model(model) +visualize_model(model_conv) plt.ioff() plt.show() From 238dcd48412361f669bc41ee48995266ac168ee8 Mon Sep 17 00:00:00 2001 From: Sasank Chilamkurthy Date: Tue, 11 Apr 2017 23:53:08 +0530 Subject: [PATCH 2/2] Better predictions visualization in TL tutorial --- beginner_source/transfer_learning_tutorial.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/beginner_source/transfer_learning_tutorial.py b/beginner_source/transfer_learning_tutorial.py index 9c5038bb1fb..79199df1073 100644 --- a/beginner_source/transfer_learning_tutorial.py +++ b/beginner_source/transfer_learning_tutorial.py @@ -235,7 +235,10 @@ def exp_lr_scheduler(optimizer, epoch, init_lr=0.001, lr_decay_epoch=7): # Generic function to display predictions for a few images # -def visualize_model(model, num_images=5): +def visualize_model(model, num_images=6): + images_so_far = 0 + fig = plt.figure() + for i, data in enumerate(dset_loaders['val']): inputs, labels = data if use_gpu: @@ -246,13 +249,15 @@ def visualize_model(model, num_images=5): outputs = model(inputs) _, preds = torch.max(outputs.data, 1) - plt.figure() - imshow(inputs.cpu().data[0], - title='pred: {}'.format(dset_classes[labels.data[0]])) - - if i == num_images - 1: - break + for j in range(inputs.size()[0]): + images_so_far += 1 + ax = plt.subplot(num_images//2, 2, images_so_far) + ax.axis('off') + ax.set_title('predicted: {}'.format(dset_classes[labels.data[j]])) + imshow(inputs.cpu().data[j]) + if images_so_far == num_images: + return ###################################################################### # Finetuning the convnet