Skip to content

Update transfer learning tutorial #62

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 11, 2017
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 57 additions & 54 deletions beginner_source/transfer_learning_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
# License: BSD
# Author: Sasank Chilamkurthy

from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
Expand Down Expand Up @@ -134,13 +136,11 @@ def imshow(inp, title=None):
# - Scheduling the learning rate
# - Saving (deep copying) the best model
#
# In the following, ``optim_scheduler`` is a function which returns an ``optim.SGD``
# object when called as ``optim_scheduler(model, epoch)``. This is useful
# when we want to change the learning rate or restrict the parameters we
# want to optimize.
#
# In the following, parameter ``lr_scheduler(optimizer, epoch)``
# is a function which modifies ``optimizer`` so that the learning
# rate is changed according to desired schedule.

def train_model(model, criterion, optim_scheduler, num_epochs=25):
def train_model(model, criterion, optimizer, lr_scheduler, num_epochs=25):
since = time.time()

best_model = model
Expand All @@ -153,7 +153,7 @@ def train_model(model, criterion, optim_scheduler, num_epochs=25):
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
optimizer = optim_scheduler(model, epoch)
optimizer = lr_scheduler(optimizer, epoch)
model.train(True) # Set model to training mode
else:
model.train(False) # Set model to evaluate mode
Expand Down Expand Up @@ -209,6 +209,24 @@ def train_model(model, criterion, optim_scheduler, num_epochs=25):
print('Best val Acc: {:4f}'.format(best_acc))
return best_model

######################################################################
# Learning rate scheduler
# ^^^^^^^^^^^^^^^^^^^^^^^
# Let's create our learning rate scheduler. We will exponentially
# decrease the learning rate once every few epochs.

def exp_lr_scheduler(optimizer, epoch, init_lr=0.001, lr_decay_epoch=7):
"""Decay learning rate by a factor of 0.1 every lr_decay_epoch epochs."""
lr = init_lr * (0.1**(epoch // lr_decay_epoch))

if epoch % lr_decay_epoch == 0:
print('LR is set to {}'.format(lr))

for param_group in optimizer.param_groups:
param_group['lr'] = lr

return optimizer


######################################################################
# Visualizing the model predictions
Expand All @@ -217,7 +235,10 @@ def train_model(model, criterion, optim_scheduler, num_epochs=25):
# Generic function to display predictions for a few images
#

def visualize_model(model, num_images=5):
def visualize_model(model, num_images=6):
images_so_far = 0
fig = plt.figure()

for i, data in enumerate(dset_loaders['val']):
inputs, labels = data
if use_gpu:
Expand All @@ -228,45 +249,34 @@ def visualize_model(model, num_images=5):
outputs = model(inputs)
_, preds = torch.max(outputs.data, 1)

plt.figure()
imshow(inputs.cpu().data[0],
title='pred: {}'.format(dset_classes[labels.data[0]]))

if i == num_images - 1:
break
for j in range(inputs.size()[0]):
images_so_far += 1
ax = plt.subplot(num_images//2, 2, images_so_far)
ax.axis('off')
ax.set_title('predicted: {}'.format(dset_classes[labels.data[j]]))
imshow(inputs.cpu().data[j])

if images_so_far == num_images:
return

######################################################################
# Finetuning the convnet
# ----------------------
#
# First, let's create our learning rate scheduler. We will exponentially
# decrease the learning rate once every few epochs.
#

def optim_scheduler_ft(model, epoch, init_lr=0.001, lr_decay_epoch=7):
lr = init_lr * (0.1**(epoch // lr_decay_epoch))

if epoch % lr_decay_epoch == 0:
print('LR is set to {}'.format(lr))

optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
return optimizer


######################################################################
# Load a pretrained model and reset final fully connected layer.
#

model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 2)

if use_gpu:
model = model.cuda()
model_ft = model_ft.cuda()

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

######################################################################
# Train and evaluate
Expand All @@ -276,12 +286,13 @@ def optim_scheduler_ft(model, epoch, init_lr=0.001, lr_decay_epoch=7):
# minute.
#

model = train_model(model, criterion, optim_scheduler_ft, num_epochs=25)
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
num_epochs=25)

######################################################################
#

visualize_model(model)
visualize_model(model_ft)


######################################################################
Expand All @@ -296,31 +307,22 @@ def optim_scheduler_ft(model, epoch, init_lr=0.001, lr_decay_epoch=7):
# `here <http://pytorch.org/docs/notes/autograd.html#excluding-subgraphs-from-backward>`__.
#

model = torchvision.models.resnet18(pretrained=True)
for param in model.parameters():
model_conv = torchvision.models.resnet18(pretrained=True)
for param in model_conv.parameters():
param.requires_grad = False

# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)
num_ftrs = model_conv.fc.in_features
model_conv.fc = nn.Linear(num_ftrs, 2)

if use_gpu:
model = model.cuda()
model_conv = model_conv.cuda()

criterion = nn.CrossEntropyLoss()
######################################################################
# Let's write ``optim_scheduler``. We will use previous lr scheduler. Also
# we need to optimize only the parameters of final FC layer.
#

def optim_scheduler_conv(model, epoch, init_lr=0.001, lr_decay_epoch=7):
lr = init_lr * (0.1**(epoch // lr_decay_epoch))

if epoch % lr_decay_epoch == 0:
print('LR is set to {}'.format(lr))

optimizer = optim.SGD(model.fc.parameters(), lr=lr, momentum=0.9)
return optimizer
# Observe that only parameters of final layer are being optimized as
# opoosed to before.
optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9)


######################################################################
Expand All @@ -332,12 +334,13 @@ def optim_scheduler_conv(model, epoch, init_lr=0.001, lr_decay_epoch=7):
# network. However, forward does need to be computed.
#

model = train_model(model, criterion, optim_scheduler_conv)
model_conv = train_model(model_conv, criterion, optimizer_conv,
exp_lr_scheduler, num_epochs=25)

######################################################################
#

visualize_model(model)
visualize_model(model_conv)

plt.ioff()
plt.show()