Skip to content

Commit a3dfc6e

Browse files
committed
Merge branch 'DQN_revise_training' of github.com:SiftingSands/tutorials into DQN_revise_training
2 parents 1015af6 + 68e29a3 commit a3dfc6e

28 files changed

+1526
-59
lines changed

.circleci/config.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,21 +134,22 @@ pytorch_tutorial_build_defaults: &pytorch_tutorial_build_defaults
134134
fi
135135
set -x
136136
137+
echo 'rm /opt/cache/bin/*' | docker exec -u root -i "$id" bash
137138
docker cp /home/circleci/project/. "$id:/var/lib/jenkins/workspace"
138139
139140
export COMMAND='((echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && ./ci_build_script.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
140141
echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
141142
142143
pytorch_tutorial_build_worker_defaults: &pytorch_tutorial_build_worker_defaults
143144
environment:
144-
DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
145+
DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda11.6-cudnn8-py3-gcc7"
145146
CUDA_VERSION: "9"
146147
resource_class: gpu.nvidia.small
147148
<<: *pytorch_tutorial_build_defaults
148149

149150
pytorch_tutorial_build_manager_defaults: &pytorch_tutorial_build_manager_defaults
150151
environment:
151-
DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
152+
DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda11.6-cudnn8-py3-gcc7"
152153
resource_class: medium
153154
<<: *pytorch_tutorial_build_defaults
154155

.circleci/config.yml.in

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,21 +134,22 @@ pytorch_tutorial_build_defaults: &pytorch_tutorial_build_defaults
134134
fi
135135
set -x
136136

137+
echo 'rm /opt/cache/bin/*' | docker exec -u root -i "$id" bash
137138
docker cp /home/circleci/project/. "$id:/var/lib/jenkins/workspace"
138139

139140
export COMMAND='((echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && ./ci_build_script.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
140141
echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
141142

142143
pytorch_tutorial_build_worker_defaults: &pytorch_tutorial_build_worker_defaults
143144
environment:
144-
DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
145+
DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda11.6-cudnn8-py3-gcc7"
145146
CUDA_VERSION: "9"
146147
resource_class: gpu.nvidia.small
147148
<<: *pytorch_tutorial_build_defaults
148149

149150
pytorch_tutorial_build_manager_defaults: &pytorch_tutorial_build_manager_defaults
150151
environment:
151-
DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
152+
DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda11.6-cudnn8-py3-gcc7"
152153
resource_class: medium
153154
<<: *pytorch_tutorial_build_defaults
154155
{% raw %}

.jenkins/build.sh

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@ pip install -r $DIR/../requirements.txt
2525
# Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html
2626
# RC Link
2727
# pip uninstall -y torch torchvision torchaudio torchtext
28-
# pip install --pre --upgrade -f https://download.pytorch.org/whl/test/cu102/torch_test.html torch torchvision torchaudio torchtext
28+
# pip install --pre --upgrade -f https://download.pytorch.org/whl/test/cu102/torch_test.html torch torchvision torchaudio torchtext
2929
# pip uninstall -y torch torchvision torchaudio torchtext
30-
# pip install -f https://download.pytorch.org/whl/test/cu111/torch_test.html torch torchvision torchaudio torchtext
30+
# pip install --pre --upgrade -f https://download.pytorch.org/whl/test/cu116/torch_test.html torch torchdata torchvision torchaudio torchtext
3131

3232
# Install two language tokenizers for Translation with TorchText tutorial
3333
python -m spacy download en_core_web_sm
@@ -47,14 +47,13 @@ if [[ "${JOB_BASE_NAME}" == *worker_* ]]; then
4747
# python $DIR/remove_runnable_code.py intermediate_source/spatial_transformer_tutorial.py intermediate_source/spatial_transformer_tutorial.py || true
4848
# Temp remove for 1.10 release.
4949
# python $DIR/remove_runnable_code.py advanced_source/neural_style_tutorial.py advanced_source/neural_style_tutorial.py || true
50-
50+
5151
# TODO: Fix bugs in these tutorials to make them runnable again
5252
# python $DIR/remove_runnable_code.py beginner_source/audio_classifier_tutorial.py beginner_source/audio_classifier_tutorial.py || true
5353

5454
# Remove runnable code from tensorboard_profiler_tutorial.py as it frequently crashes, see https://github.com/pytorch/pytorch/issues/74139
5555
# python $DIR/remove_runnable_code.py intermediate_source/tensorboard_profiler_tutorial.py intermediate_source/tensorboard_profiler_tutorial.py || true
5656

57-
5857
# Step 2: Keep certain tutorials based on file count, and remove runnable code in all other tutorials
5958
# IMPORTANT NOTE: We assume that each tutorial has a UNIQUE filename.
6059
export WORKER_ID=$(echo "${JOB_BASE_NAME}" | tr -dc '0-9')

.jenkins/validate_tutorials_built.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
"recipes/Captum_Recipe",
5151
"hyperparameter_tuning_tutorial",
5252
"flask_rest_api_tutorial",
53+
"text_to_speech_with_torchaudio",
5354
]
5455

5556

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,10 @@ download:
102102
wget -nv -N https://download.pytorch.org/models/resnet18-5c106cde.pth -P $(DATADIR)
103103
cp $(DATADIR)/resnet18-5c106cde.pth prototype_source/data/resnet18_pretrained_float.pth
104104

105+
# Download vocab for beginner_source/flava_finetuning_tutorial.py
106+
wget -nv -N http://dl.fbaipublicfiles.com/pythia/data/vocab.tar.gz -P $(DATADIR)
107+
tar $(TAROPTS) -xzf $(DATADIR)/vocab.tar.gz -C ./beginner_source/data/
108+
105109

106110
docs:
107111
make download
108 KB
Loading
211 KB
Loading
116 KB
Loading

beginner_source/blitz/cifar10_tutorial.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def imshow(img):
105105

106106
# get some random training images
107107
dataiter = iter(trainloader)
108-
images, labels = dataiter.next()
108+
images, labels = next(dataiter)
109109

110110
# show images
111111
imshow(torchvision.utils.make_grid(images))
@@ -210,7 +210,7 @@ def forward(self, x):
210210
# Okay, first step. Let us display an image from the test set to get familiar.
211211

212212
dataiter = iter(testloader)
213-
images, labels = dataiter.next()
213+
images, labels = next(dataiter)
214214

215215
# print images
216216
imshow(torchvision.utils.make_grid(images))

beginner_source/deep_learning_60min_blitz.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,28 +37,28 @@ packages installed.
3737
.. grid:: 4
3838

3939
.. grid-item-card:: :octicon:`file-code;1em` Tensors
40-
:link: /beginner/blitz/tensor_tutorial.html
40+
:link: blitz/tensor_tutorial.html
4141

4242
In this tutorial, you will learn the basics of PyTorch tensors.
4343
+++
4444
:octicon:`code;1em` Code
4545

4646
.. grid-item-card:: :octicon:`file-code;1em` A Gentle Introduction to torch.autograd
47-
:link: /beginner/blitz/autograd_tutorial.html
47+
:link: blitz/autograd_tutorial.html
4848

4949
Learn about autograd.
5050
+++
5151
:octicon:`code;1em` Code
5252

5353
.. grid-item-card:: :octicon:`file-code;1em` Neural Networks
54-
:link: /beginner/blitz/neural_networks_tutorial.html
54+
:link: blitz/neural_networks_tutorial.html
5555

5656
This tutorial demonstrates how you can train neural networks in PyTorch.
5757
+++
5858
:octicon:`code;1em` Code
5959

6060
.. grid-item-card:: :octicon:`file-code;1em` Training a Classifier
61-
:link: /beginner/blitz/cifar10_tutorial.html
61+
:link: blitz/cifar10_tutorial.html
6262

6363
Learn how to train an image classifier in PyTorch by using the
6464
CIFAR10 dataset.
Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
TorchMultimodal Tutorial: Finetuning FLAVA
4+
============================================
5+
"""
6+
7+
######################################################################
8+
# Multimodal AI has recently become very popular owing to its ubiquitous
9+
# nature, from use cases like image captioning and visual search to more
10+
# recent applications like image generation from text. **TorchMultimodal
11+
# is a library powered by Pytorch consisting of building blocks and end to
12+
# end examples, aiming to enable and accelerate research in
13+
# multimodality**.
14+
#
15+
# In this tutorial, we will demonstrate how to use a **pretrained SoTA
16+
# model called** `FLAVA <https://arxiv.org/pdf/2112.04482.pdf>`__ **from
17+
# TorchMultimodal library to finetune on a multimodal task i.e. visual
18+
# question answering** (VQA). The model consists of two unimodal transformer
19+
# based encoders for text and image and a multimodal encoder to combine
20+
# the two embeddings. It is pretrained using contrastive, image text matching and
21+
# text, image and multimodal masking losses.
22+
23+
24+
######################################################################
25+
# Installation
26+
# -----------------
27+
# We will use TextVQA dataset and bert tokenizer from HuggingFace for this
28+
# tutorial. So you need to install datasets and transformers in addition to TorchMultimodal.
29+
#
30+
# .. note::
31+
#
32+
# When running this tutorial in Google Colab, install the required packages by
33+
# creating a new cell and running the following commands:
34+
#
35+
# .. code-block::
36+
#
37+
# !pip install torchmultimodal-nightly
38+
# !pip install datasets
39+
# !pip install transformers
40+
#
41+
42+
######################################################################
43+
# Steps
44+
# -----
45+
#
46+
# 1. Download the HuggingFace dataset to a directory on your computer by running the following command:
47+
#
48+
# .. code-block::
49+
#
50+
# wget http://dl.fbaipublicfiles.com/pythia/data/vocab.tar.gz
51+
# tar xf vocab.tar.gz
52+
#
53+
# .. note::
54+
# If you are running this tutorial in Google Colab, run these commands
55+
# in a new cell and prepend these commands with an exclamation mark (!)
56+
#
57+
#
58+
# 2. For this tutorial, we treat VQA as a classification task where
59+
# the inputs are images and question (text) and the output is an answer class.
60+
# So we need to download the vocab file with answer classes and create the answer to
61+
# label mapping.
62+
#
63+
# We also load the `textvqa
64+
# dataset <https://arxiv.org/pdf/1904.08920.pdf>`__ containing 34602 training samples
65+
# (images,questions and answers) from HuggingFace
66+
#
67+
# We see there are 3997 answer classes including a class representing
68+
# unknown answers.
69+
#
70+
71+
with open("data/vocabs/answers_textvqa_more_than_1.txt") as f:
72+
vocab = f.readlines()
73+
74+
answer_to_idx = {}
75+
for idx, entry in enumerate(vocab):
76+
answer_to_idx[entry.strip("\n")] = idx
77+
print(len(vocab))
78+
print(vocab[:5])
79+
80+
from datasets import load_dataset
81+
dataset = load_dataset("textvqa")
82+
83+
######################################################################
84+
# Lets display a sample entry from the dataset:
85+
#
86+
87+
import matplotlib.pyplot as plt
88+
import numpy as np
89+
idx = 5
90+
print("Question: ", dataset["train"][idx]["question"])
91+
print("Answers: " ,dataset["train"][idx]["answers"])
92+
im = np.asarray(dataset["train"][idx]["image"].resize((500,500)))
93+
plt.imshow(im)
94+
plt.show()
95+
96+
97+
######################################################################
98+
# 3. Next, we write the transform function to convert the image and text into
99+
# Tensors consumable by our model - For images, we use the transforms from
100+
# torchvision to convert to Tensor and resize to uniform sizes - For text,
101+
# we tokenize (and pad) them using the BertTokenizer from HuggingFace -
102+
# For answers (i.e. labels), we take the most frequently occuring answer
103+
# as the label to train with:
104+
#
105+
106+
import torch
107+
from torchvision import transforms
108+
from collections import defaultdict
109+
from transformers import BertTokenizer
110+
from functools import partial
111+
112+
def transform(tokenizer, input):
113+
batch = {}
114+
image_transform = transforms.Compose([transforms.ToTensor(), transforms.Resize([224,224])])
115+
image = image_transform(input["image"][0].convert("RGB"))
116+
batch["image"] = [image]
117+
118+
tokenized=tokenizer(input["question"],return_tensors='pt',padding="max_length",max_length=512)
119+
batch.update(tokenized)
120+
121+
122+
ans_to_count = defaultdict(int)
123+
for ans in input["answers"][0]:
124+
ans_to_count[ans] += 1
125+
max_value = max(ans_to_count, key=ans_to_count.get)
126+
ans_idx = answer_to_idx.get(max_value,0)
127+
batch["answers"] = torch.as_tensor([ans_idx])
128+
return batch
129+
130+
tokenizer=BertTokenizer.from_pretrained("bert-base-uncased",padding="max_length",max_length=512)
131+
transform=partial(transform,tokenizer)
132+
dataset.set_transform(transform)
133+
134+
135+
######################################################################
136+
# 4. Finally, we import the flava_model_for_classification from
137+
# torchmultimodal. It loads the pretrained flava checkpoint by default and
138+
# includes a classification head.
139+
#
140+
# The model forward function passes the image through the visual encoder
141+
# and the question through the text encoder. The image and question
142+
# embeddings are then passed through the multimodal encoder. The final
143+
# embedding corresponding to the CLS token is passed through a MLP head
144+
# which finally gives the probability distribution over each possible
145+
# answers.
146+
#
147+
148+
from torchmultimodal.models.flava.model import flava_model_for_classification
149+
model = flava_model_for_classification(num_classes=len(vocab))
150+
151+
152+
######################################################################
153+
# 5. We put together the dataset and model in a toy training loop to
154+
# demonstrate how to train the model for 3 iterations:
155+
#
156+
157+
from torch import nn
158+
BATCH_SIZE = 2
159+
MAX_STEPS = 3
160+
from torch.utils.data import DataLoader
161+
162+
train_dataloader = DataLoader(dataset["train"], batch_size= BATCH_SIZE)
163+
optimizer = torch.optim.AdamW(model.parameters())
164+
165+
166+
epochs = 1
167+
for _ in range(epochs):
168+
for idx, batch in enumerate(train_dataloader):
169+
optimizer.zero_grad()
170+
out = model(text = batch["input_ids"], image = batch["image"], labels = batch["answers"])
171+
loss = out.loss
172+
loss.backward()
173+
optimizer.step()
174+
print(f"Loss at step {idx} = {loss}")
175+
if idx > MAX_STEPS-1:
176+
break
177+
178+
179+
######################################################################
180+
# Conclusion
181+
# -------------------
182+
#
183+
# This tutorial introduced the basics around how to finetune on a
184+
# multimodal task using FLAVA from TorchMultimodal. Please also check out
185+
# other examples from the library like
186+
# `MDETR <https://github.com/facebookresearch/multimodal/tree/main/torchmultimodal/models/mdetr>`__
187+
# which is a multimodal model for object detection and
188+
# `Omnivore <https://github.com/facebookresearch/multimodal/blob/main/torchmultimodal/models/omnivore.py>`__
189+
# which is multitask model spanning image, video and 3d classification.
190+
#

beginner_source/introyt/introyt1_tutorial.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,7 @@ def imshow(img):
369369

370370
# get some random training images
371371
dataiter = iter(trainloader)
372-
images, labels = dataiter.next()
372+
images, labels = next(dataiter)
373373

374374
# show images
375375
imshow(torchvision.utils.make_grid(images))
@@ -446,7 +446,7 @@ def imshow(img):
446446

447447
# get some random training images
448448
dataiter = iter(trainloader)
449-
images, labels = dataiter.next()
449+
images, labels = next(dataiter)
450450

451451
# show images
452452
imshow(torchvision.utils.make_grid(images))

beginner_source/introyt/tensorboardyt_tutorial.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def matplotlib_imshow(img, one_channel=False):
115115

116116
# Extract a batch of 4 images
117117
dataiter = iter(training_loader)
118-
images, labels = dataiter.next()
118+
images, labels = next(dataiter)
119119

120120
# Create a grid from the images and show them
121121
img_grid = torchvision.utils.make_grid(images)
@@ -242,7 +242,7 @@ def forward(self, x):
242242

243243
# Again, grab a single mini-batch of images
244244
dataiter = iter(training_loader)
245-
images, labels = dataiter.next()
245+
images, labels = next(dataiter)
246246

247247
# add_graph() will trace the sample input through your model,
248248
# and render it as a graph.

beginner_source/introyt/trainingyt.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ def matplotlib_imshow(img, one_channel=False):
112112
plt.imshow(np.transpose(npimg, (1, 2, 0)))
113113

114114
dataiter = iter(training_loader)
115-
images, labels = dataiter.next()
115+
images, labels = next(dataiter)
116116

117117
# Create a grid from the images and show them
118118
img_grid = torchvision.utils.make_grid(images)

0 commit comments

Comments
 (0)