diff --git a/Makefile b/Makefile index 62557cd1f4f..ec18f71d1f4 100644 --- a/Makefile +++ b/Makefile @@ -20,16 +20,27 @@ help: %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -docs: - wget -N https://download.pytorch.org/tutorial/data.zip +download: + # transfer learning tutorial data wget -N https://download.pytorch.org/tutorial/hymenoptera_data.zip unzip -o hymenoptera_data.zip -d beginner_source + + # nlp tutorial data + wget -N https://download.pytorch.org/tutorial/data.zip unzip -o data.zip -d intermediate_source + + # data loader tutorial + wget -N https://download.pytorch.org/tutorial/faces.zip + unzip -o faces.zip -d beginner_source + + # neural style images rm -rf advanced_source/images/ cp -r _static/img/neural-style/ advanced_source/images/ - rm -rf docs + +docs: + make download make html + rm -rf docs cp -r $(BUILDDIR)/html docs touch docs/.nojekyll diff --git a/_static/img/landmarked_face2.png b/_static/img/landmarked_face2.png new file mode 100644 index 00000000000..2faa58b37f2 Binary files /dev/null and b/_static/img/landmarked_face2.png differ diff --git a/beginner_source/data_loading_tutorial.py b/beginner_source/data_loading_tutorial.py new file mode 100644 index 00000000000..c32eef6510c --- /dev/null +++ b/beginner_source/data_loading_tutorial.py @@ -0,0 +1,445 @@ +# -*- coding: utf-8 -*- +""" +Data Loading and Processing Tutorial +==================================== + +A lot of effort in solving any machine learning problem goes in to +preparing the data. PyTorch provides many tools to make data loading +easy and hopefully, to make your code more readable. In this tutorial, +we will see how to load and preprocess/augment data from a non trivial +dataset. + +To run this tutorial, please make sure the following pacakges are +installed: + +- ``scikit-image``: For image io and transforms +- ``pandas``: For easier csv parsing + +""" + +from __future__ import print_function, division +import os +import torch +import pandas as pd +from skimage import io, transform +import numpy as np +import matplotlib.pyplot as plt +from torch.utils.data import Dataset, DataLoader +from torchvision import transforms, utils + +# Ignore warnings +import warnings +warnings.filterwarnings("ignore") + +plt.ion() # interactive mode + +###################################################################### +# The dataset we are going to deal with is that of facial pose. +# This means that a face is annotated like this: +# +# .. figure:: /_static/img/landmarked_face2.png +# :width: 400 +# +# Over all, 68 different landmark points are annotated for each face. +# +# .. note:: +# Download the dataset from `here `_ +# so that the images are in a directory named 'faces/'. +# This dataset was actually +# generated by applying excellent `dlib's pose +# estimation `__ +# on a few images from imagenet tagged as 'face'. +# +# Dataset comes with a csv file with annotations which looks like this: +# +# :: +# +# image_name,part_0_x,part_0_y,part_1_x,part_1_y,part_2_x, ... ,part_67_x,part_67_y +# 0805personali01.jpg,27,83,27,98, ... 84,134 +# 1084239450_e76e00b7e7.jpg,70,236,71,257, ... ,128,312 +# +# Let's quickly read the CSV and get the annotations in an (N, 2) array where N +# is the number of landmarks. +# + +landmarks_frame = pd.read_csv('faces/face_landmarks.csv') + +n = 65 +img_name = landmarks_frame.ix[n, 0] +landmarks = landmarks_frame.ix[n, 1:].as_matrix().astype('float') +landmarks = landmarks.reshape(-1, 2) + +print('Image name: {}'.format(img_name)) +print('Landmarks shape: {}'.format(landmarks.shape)) +print('First 4 Landmarks: {}'.format(landmarks[:4])) + + +###################################################################### +# Let's write a simple helper function to show an image and its landmarks +# and use it to show a sample. +# + +def show_landmarks(image, landmarks): + """Show image with landmarks""" + plt.imshow(image) + plt.scatter(landmarks[:, 0], landmarks[:, 1], s=10, marker='.', c='r') + plt.pause(0.001) # pause a bit so that plots are updated + +plt.figure() +show_landmarks(io.imread(os.path.join('faces/', img_name)), + landmarks) +plt.show() + + +###################################################################### +# Dataset class +# ------------- +# +# ``torch.utils.data.Dataset`` is an abstract class representing a +# dataset. +# Your custom dataset should inherit ``Dataset`` and override the following +# methods: +# +# - ``__len__`` so that ``len(dataset)`` returns the size of the dataset. +# - ``__getitem__`` to support the indexing such that ``dataset[i]`` can +# be used to get :math:`i`\ th sample +# +# Let's create a dataset class for our face landmarks dataset. We will +# read the csv in ``__init__`` but leave the reading of images to +# ``__getitem__``. This is memory efficient because all the images are not +# stored in the memory at once but read as required. +# +# Sample of our dataset will be a dict +# ``{'image': image, 'landmarks': landmarks}``. Our datset will take an +# optional argument ``transform`` so that any required processing can be +# applied on the sample. We will see the usefulness of ``transform`` in the +# next section. +# + +class FaceLandmarksDataset(Dataset): + """Face Landmarks dataset.""" + + def __init__(self, csv_file, root_dir, transform=None): + """ + Args: + csv_file (string): Path to the csv file with annotations. + root_dir (string): Directory with all the images. + transform (callable, optional): Optional transform to be applied + on a sample. + """ + self.landmarks_frame = pd.read_csv(csv_file) + self.root_dir = root_dir + self.transform = transform + + def __len__(self): + return len(self.landmarks_frame) + + def __getitem__(self, idx): + img_name = os.path.join(self.root_dir, self.landmarks_frame.ix[idx, 0]) + image = io.imread(img_name) + landmarks = landmarks_frame.ix[idx, 1:].as_matrix().astype('float') + landmarks = landmarks.reshape(-1, 2) + sample = {'image': image, 'landmarks': landmarks} + + if self.transform: + sample = self.transform(sample) + + return sample + + +###################################################################### +# Let's instantiate this class and iterate through the data samples. We +# will print the sizes of first 4 samples and show their landmarks. +# + +face_dataset = FaceLandmarksDataset(csv_file='faces/face_landmarks.csv', + root_dir='faces/') + +fig = plt.figure() + +for i in range(len(face_dataset)): + sample = face_dataset[i] + + print(i, sample['image'].shape, sample['landmarks'].shape) + + ax = plt.subplot(1, 4, i + 1) + plt.tight_layout() + ax.set_title('Sample #{}'.format(i)) + ax.axis('off') + show_landmarks(**sample) + + if i == 3: + plt.show() + break + + +###################################################################### +# Transforms +# ---------- +# +# One issue we can see from the above is that the samples are not of the +# same size. Most neural networks expect the images of a fixed size. +# Therefore, we will need to write some prepocessing code. +# Let's create three transforms: +# +# - ``Rescale``: to scale the image +# - ``RandomCrop``: to crop from image randomly. This is data +# augmentation. +# - ``ToTensor``: to convert the numpy images to torch images (we need to +# swap axes). +# +# We will write them as callable classes instead of simple functions so +# that parameters of the transform need not be passed everytime it's +# called. For this, we just need to implement ``__call__`` method and +# if required, ``__init__`` method. We can then use a transform like this: +# +# :: +# +# tsfm = Transform(params) +# transformed_sample = tsfm(sample) +# +# Observe below how these transforms had to be applied both on the image and +# landmarks. +# + +class Rescale(object): + """Rescale the image in a sample to a given size. + + Args: + output_size (tuple or tuple): Desired output size. If tuple, output is + matched to output_size. If int, smaller of image edges is matched + to output_size keeping aspect ratio the same. + """ + + def __init__(self, output_size): + assert isinstance(output_size, (int, tuple)) + self.output_size = output_size + + def __call__(self, sample): + image, landmarks = sample['image'], sample['landmarks'] + + h, w = image.shape[:2] + if isinstance(self.output_size, int): + if h > w: + new_h, new_w = self.output_size * h / w, self.output_size + else: + new_h, new_w = self.output_size, self.output_size * w / h + else: + new_h, new_w = self.output_size + + new_h, new_w = int(new_h), int(new_w) + + img = transform.resize(image, (new_h, new_w)) + + # h and w are swapped for landmarks because for images, + # x and y axes are axis 1 and 0 respectively + landmarks = landmarks * [new_w / w, new_h / h] + + return {'image': img, 'landmarks': landmarks} + + +class RandomCrop(object): + """Crop randomly the image in a sample. + + Args: + output_size (tuple or int): Desired output size. If int, square crop + is made. + """ + + def __init__(self, output_size): + assert isinstance(output_size, (int, tuple)) + if isinstance(output_size, int): + self.output_size = (output_size, output_size) + else: + assert len(output_size) == 2 + self.output_size = output_size + + def __call__(self, sample): + image, landmarks = sample['image'], sample['landmarks'] + + h, w = image.shape[:2] + new_h, new_w = self.output_size + + top = np.random.randint(0, h - new_h) + left = np.random.randint(0, w - new_w) + + image = image[top: top + new_h, + left: left + new_w] + + landmarks = landmarks - [left, top] + + return {'image': image, 'landmarks': landmarks} + + +class ToTensor(object): + """Convert ndarrays in sample to Tensors.""" + + def __call__(self, sample): + image, landmarks = sample['image'], sample['landmarks'] + + # swap color axis because + # numpy image: H x W x C + # torch image: C X H X W + image = image.transpose((2, 0, 1)) + return {'image': torch.from_numpy(image), + 'landmarks': torch.from_numpy(landmarks)} + + +###################################################################### +# Compose transforms +# ~~~~~~~~~~~~~~~~~~ +# +# Now, we apply the transforms on an sample. +# +# Let's say we want to rescale the shorter side of the image to 256 and +# then randomly crop a square of size 224 from it. i.e, we want to compose +# ``Rescale`` and ``RandomCrop`` transforms. +# ``torchvision.transforms.Compose`` is a simple callable class which allows us +# to do this. +# + +scale = Rescale(256) +crop = RandomCrop(128) +composed = transforms.Compose([Rescale(256), + RandomCrop(224)]) + +# Apply each of the above transforms on sample. +fig = plt.figure() +sample = face_dataset[65] +for i, tsfrm in enumerate([scale, crop, composed]): + transformed_sample = tsfrm(sample) + + ax = plt.subplot(1, 3, i + 1) + plt.tight_layout() + ax.set_title(type(tsfrm).__name__) + show_landmarks(**transformed_sample) + +plt.show() + + +###################################################################### +# Iterating through the dataset +# ----------------------------- +# +# Let's put this all together to create a dataset with composed +# transforms. +# To summarize, every time this dataset is sampled: +# +# - An image is read from the file on the fly +# - Transforms are applied on the read image +# - Since one of the transforms is random, data is augmentated on +# sampling +# +# We can iterate over the created dataset with a ``for i in range`` +# loop as before. +# + +transformed_dataset = FaceLandmarksDataset(csv_file='faces/face_landmarks.csv', + root_dir='faces/', + transform=transforms.Compose([ + Rescale(256), + RandomCrop(224), + ToTensor() + ])) + +for i in range(len(transformed_dataset)): + sample = transformed_dataset[i] + + print(i, sample['image'].size(), sample['landmarks'].size()) + + if i == 3: + break + + +###################################################################### +# However, we are losing a lot of features by using a simple ``for`` loop to +# iterate over the data. In particular, we are missing out on: +# +# - Batching the data +# - Shuffling the data +# - Load the data in parallel using ``multiprocessing`` workers. +# +# ``torch.utils.data.DataLoader`` is an iterator which provides all these +# features. Parameters used below should be clear. One parameter of +# interest is ``collate_fn``. You can specify how exactly the samples need +# to be batched using ``collate_fn``. However, default collate should work +# fine for most use cases. +# + +dataloader = DataLoader(transformed_dataset, batch_size=4, + shuffle=True, num_workers=4) + + +# Helper function to show a batch +def show_landmarks_batch(sample_batched): + """Show image with landmarks for a batch of samples.""" + images_batch, landmarks_batch = \ + sample_batched['image'], sample_batched['landmarks'] + batch_size = len(images_batch) + im_size = images_batch.size(2) + + grid = utils.make_grid(images_batch) + plt.imshow(grid.numpy().transpose((1, 2, 0))) + + for i in range(batch_size): + plt.scatter(landmarks_batch[i, :, 0].numpy() + i * im_size, + landmarks_batch[i, :, 1].numpy(), + s=10, marker='.', c='r') + + plt.title('Batch from dataloader') + +for i_batch, sample_batched in enumerate(dataloader): + print(i_batch, sample_batched['image'].size(), + sample_batched['landmarks'].size()) + + # observe 4th batch and stop. + if i_batch == 3: + plt.figure() + show_landmarks_batch(sample_batched) + plt.axis('off') + plt.ioff() + plt.show() + break + +###################################################################### +# Afterword: torchvision +# ---------------------- +# +# In this tutorial, we have seen how to write and use datasets, transforms +# and dataloader. ``torchvision`` package provides some common datasets and +# transforms. You might not even have to write custom classes. One of the +# more generic datasets available in torchvision is ``ImageFolder``. +# It assumes that images are organized in the following way: :: +# +# root/ants/xxx.png +# root/ants/xxy.jpeg +# root/ants/xxz.png +# . +# . +# . +# root/bees/123.jpg +# root/bees/nsdf3.png +# root/bees/asd932_.png +# +# where 'ants', 'bees' etc. are class labels. Similarly generic transforms +# which operate on ``PIL.Image`` like ``RandomHorizontalFlip``, ``Scale``, +# are also avaiable. You can use these to write a dataloader like this: :: +# +# import torch +# from torchvision import transforms, datasets +# +# data_transform = transforms.Compose([ +# transforms.RandomSizedCrop(224), +# transforms.RandomHorizontalFlip(), +# transforms.ToTensor(), +# transforms.Normalize(mean=[0.485, 0.456, 0.406], +# std=[0.229, 0.224, 0.225]) +# ]) +# hymenoptera_dataset = datasets.ImageFolder(root='hymenoptera_data/train', +# transform=data_transform) +# dataset_loader = torch.utils.data.DataLoader(hymenoptera_dataset, +# batch_size=4, shuffle=True, +# num_workers=4) +# +# For an example with training code, please see +# :doc:`transfer_learning_tutorial`. diff --git a/index.rst b/index.rst index 72a817c3fc8..c4c43612b27 100644 --- a/index.rst +++ b/index.rst @@ -40,6 +40,8 @@ Beginner Tutorials .. galleryitem:: beginner/transfer_learning_tutorial.py +.. galleryitem:: beginner/data_loading_tutorial.py + .. customgalleryitem:: :tooltip: I am writing this tutorial to focus specifically on NLP for people who have never written code in any deep learning framework :figure: /_static/img/thumbnails/babel.jpg @@ -60,6 +62,7 @@ Beginner Tutorials beginner/former_torchies_tutorial beginner/pytorch_with_examples beginner/transfer_learning_tutorial + beginner/data_loading_tutorial beginner/deep_learning_nlp_tutorial Intermediate Tutorials