From 242f4207d084dff40af106119587d9241c0de16a Mon Sep 17 00:00:00 2001 From: Ankita De Date: Sat, 24 Sep 2022 10:47:53 -0700 Subject: [PATCH 01/23] [WIP] Add torchmultimodal tutorial for flava finetuning --- Makefile | 4 + beginner_source/flava_finetuning_tutorial.py | 173 +++++++++++++++++++ index.rst | 17 ++ requirements.txt | 3 + 4 files changed, 197 insertions(+) create mode 100644 beginner_source/flava_finetuning_tutorial.py diff --git a/Makefile b/Makefile index 8c21384967c..a01ea69bb50 100644 --- a/Makefile +++ b/Makefile @@ -102,6 +102,10 @@ download: wget -nv -N https://download.pytorch.org/models/resnet18-5c106cde.pth -P $(DATADIR) cp $(DATADIR)/resnet18-5c106cde.pth prototype_source/data/resnet18_pretrained_float.pth + # Download vocab for beginner_source/flava_finetuning_tutorial.py + wget -nv -N http://dl.fbaipublicfiles.com/pythia/data/vocab.tar.gz -P $(DATADIR) + tar $(TAROPTS) -xzf $(DATADIR)/vocab.tar.gz -C ./beginner_source/data/ + docs: make download diff --git a/beginner_source/flava_finetuning_tutorial.py b/beginner_source/flava_finetuning_tutorial.py new file mode 100644 index 00000000000..0a067043e6d --- /dev/null +++ b/beginner_source/flava_finetuning_tutorial.py @@ -0,0 +1,173 @@ +# -*- coding: utf-8 -*- +""" +TorchMultimodal Tutorial : Finetuning FLAVA +======================= +""" + +###################################################################### +# Multimodal AI has recently become very popular owing to its ubiquitous +# nature, from use cases like image captioning and visual search to more +# recent applications like image generation from text. **TorchMultimodal +# is a library powered by Pytorch consisting of building blocks and end to +# end examples, aiming to enable and accelerate research in +# multimodality**. +# +# In this tutorial, we will demonstrate how to use a **pretrained SoTA +# model called** `FLAVA `__ **from +# TorchMultimodal library to finetune on a multimodal task i.e. visual +# question answering** (VQA). +# + + +###################################################################### +# Installations +# +# We will use TextVQA dataset from HuggingFace for this +# tutorial. So we install datasets in addition to TorchMultimodal +# + +# TODO: replace with install from pip when binary is ready +!git clone https://github.com/facebookresearch/multimodal.git +!pip install -r multimodal/requirements.txt +import os +import sys +sys.path.append(os.path.join(os.getcwd(),"multimodal")) +sys.path.append(os.getcwd()) +!pip install datasets +!pip install transformers + + +###################################################################### +# For this tutorial, we treat VQA as a classification task. So we need to +# download the vocab file with answer classes and create the answer to +# label mapping. +# +# We also load the `textvqa +# dataset `__ from HuggingFace +# + +!wget http://dl.fbaipublicfiles.com/pythia/data/vocab.tar.gz +!tar xf vocab.tar.gz + + +with open("vocabs/answers_textvqa_more_than_1.txt") as f: + vocab = f.readlines() + +answer_to_idx = {} +for idx, entry in enumerate(vocab): + answer_to_idx[entry.strip("\n")] = idx + + +###################################################################### +# We see there are 3997 answer classes including a class representing +# unknown answers +# + +print(len(vocab)) +print(vocab[:5]) + +from datasets import load_dataset +dataset = load_dataset("textvqa") + +from IPython.display import display, Image +idx = 5 +print("Question: ", dataset["train"][idx]["question"]) +print("Answers: " ,dataset["train"][idx]["answers"]) +display(dataset["train"][idx]["image"].resize((500,500))) + + +###################################################################### +# Next we write the transform function to convert the image and text into +# Tensors consumable by our model - For images, we use the transforms from +# torchvision to convert to Tensor and resize to uniform sizes - For text, +# we tokenize (and pad) them using the BertTokenizer from HuggingFace - +# For answers (i.e. labels), we take the most frequently occuring answer +# as the label to train with +# + +import torch +from torchvision import transforms +from collections import defaultdict +from transformers import BertTokenizer +from functools import partial + +def transform(tokenizer, input): + batch = {} + image_transform = transforms.Compose([transforms.ToTensor(), transforms.Resize([224,224])]) + image = image_transform(input["image"][0].convert("RGB")) + batch["image"] = [image] + + tokenized=tokenizer(input["question"],return_tensors='pt',padding="max_length",max_length=512) + batch.update(tokenized) + + + ans_to_count = defaultdict(int) + for ans in input["answers"][0]: + ans_to_count[ans] += 1 + max_value = max(ans_to_count, key=ans_to_count.get) + ans_idx = answer_to_idx.get(max_value,0) + batch["answers"] = torch.as_tensor([ans_idx]) + + return batch + +tokenizer=BertTokenizer.from_pretrained("bert-base-uncased",padding="max_length",max_length=512) +transform=partial(transform,tokenizer) +dataset.set_transform(transform) + + +###################################################################### +# Finally, we import the flava_model_for_classification from +# torchmultimodal. It loads the pretrained flava checkpoint by default and +# includes a classification head. +# +# The model forward function passes the image through the visual encoder +# and the question through the text encoder. The image and question +# embeddings are then passed through the multimodal encoder. The final +# embedding corresponding to the CLS token is passed through a MLP head +# which finally gives the probability distribution over each possible +# answers. +# + +from torchmultimodal.models.flava.model import flava_model_for_classification +model = flava_model_for_classification(num_classes=len(vocab)) + + +###################################################################### +# We put together the dataset and model in a toy training loop to +# demonstrate how to train the model for 3 iterations. +# + +from torch import nn +BATCH_SIZE = 2 +MAX_STEPS = 3 +from torch.utils.data import DataLoader + +train_dataloader = DataLoader(dataset["train"], batch_size= BATCH_SIZE) +optimizer = torch.optim.AdamW(model.parameters()) + + +epochs = 1 +for _ in range(epochs): + for idx, batch in enumerate(train_dataloader): + optimizer.zero_grad() + out = model(text = batch["input_ids"], image = batch["image"], labels = batch["answers"], required_embedding="mm") + loss = out.loss + loss.backward() + optimizer.step() + print(f"Loss at step {idx} = {loss}") + if idx > MAX_STEPS-1: + break + + +###################################################################### +# Conclusion +# +# This tutorial introduced the basics around how to finetune on a +# multimodal task using FLAVA from TorchMultimodal. Please also check out +# other examples from the library like +# `MDETR `__ +# which is a multimodal model for object detection and +# `Omnivore `__ +# which is multitask model spanning image, video and 3d classification. +# + diff --git a/index.rst b/index.rst index 89f04219d87..5bb20a34825 100644 --- a/index.rst +++ b/index.rst @@ -646,6 +646,15 @@ What's new in PyTorch tutorials? :link: advanced/sharding.html :tags: TorchRec,Recommender +.. Multimodality + +.. customcarditem:: + :header: Introduction to TorchMultimodal + :card_description: TorchMultimodal is a library that provides models, primitives and examples for training multimodal tasks + :image: _static/img/thumbnails/torchrec.png + :link: beginner/flava_finetuning_tutorial.html + :tags: TorchMultimodal + .. End of tutorial card section @@ -919,3 +928,11 @@ Additional Resources intermediate/torchrec_tutorial advanced/sharding + +.. toctree:: + :maxdepth: 2 + :includehidden: + :hidden: + :caption: Multimodality + + beginner/flava_finetuning_tutorial diff --git a/requirements.txt b/requirements.txt index dd632f368a4..cd621afed32 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,6 +27,9 @@ pytorch-lightning torchx ax-platform nbformat>=4.2.0 +datasets +transformers +torchmultimodal-nightly # PyTorch Theme -e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme From d991b9b1a3f003a40ad855ff40f43d8c0010ca7f Mon Sep 17 00:00:00 2001 From: Ankita De Date: Mon, 26 Sep 2022 11:01:06 -0700 Subject: [PATCH 02/23] [WIP] Add torchmultimodal tutorial for flava finetuning ghstack-source-id: e04328489b09a4f53d731b7aebcb424881567531 Pull Request resolved: https://github.com/pytorch/tutorials/pull/2055 --- Makefile | 4 + beginner_source/flava_finetuning_tutorial.py | 172 +++++++++++++++++++ index.rst | 17 ++ requirements.txt | 3 + 4 files changed, 196 insertions(+) create mode 100644 beginner_source/flava_finetuning_tutorial.py diff --git a/Makefile b/Makefile index 8c21384967c..a01ea69bb50 100644 --- a/Makefile +++ b/Makefile @@ -102,6 +102,10 @@ download: wget -nv -N https://download.pytorch.org/models/resnet18-5c106cde.pth -P $(DATADIR) cp $(DATADIR)/resnet18-5c106cde.pth prototype_source/data/resnet18_pretrained_float.pth + # Download vocab for beginner_source/flava_finetuning_tutorial.py + wget -nv -N http://dl.fbaipublicfiles.com/pythia/data/vocab.tar.gz -P $(DATADIR) + tar $(TAROPTS) -xzf $(DATADIR)/vocab.tar.gz -C ./beginner_source/data/ + docs: make download diff --git a/beginner_source/flava_finetuning_tutorial.py b/beginner_source/flava_finetuning_tutorial.py new file mode 100644 index 00000000000..a4f5d030a54 --- /dev/null +++ b/beginner_source/flava_finetuning_tutorial.py @@ -0,0 +1,172 @@ +###################################################################### +# TorchMultimodal Tutorial: FLAVA finetuning +# -------------------------------------------- +# + +###################################################################### +# Multimodal AI has recently become very popular owing to its ubiquitous +# nature, from use cases like image captioning and visual search to more +# recent applications like image generation from text. **TorchMultimodal +# is a library powered by Pytorch consisting of building blocks and end to +# end examples, aiming to enable and accelerate research in +# multimodality**. +# +# In this tutorial, we will demonstrate how to use a **pretrained SoTA +# model called** `FLAVA `__ **from +# TorchMultimodal library to finetune on a multimodal task i.e. visual +# question answering** (VQA). +# + + +###################################################################### +# Installations +# +# We will use TextVQA dataset from HuggingFace for this +# tutorial. So we install datasets in addition to TorchMultimodal +# + +# TODO: replace with install from pip when binary is ready +!git clone https://github.com/facebookresearch/multimodal.git +!pip install -r multimodal/requirements.txt +import os +import sys +sys.path.append(os.path.join(os.getcwd(),"multimodal")) +sys.path.append(os.getcwd()) +!pip install datasets +!pip install transformers + + +###################################################################### +# For this tutorial, we treat VQA as a classification task. So we need to +# download the vocab file with answer classes and create the answer to +# label mapping. +# +# We also load the `textvqa +# dataset `__ from HuggingFace +# + +!wget http://dl.fbaipublicfiles.com/pythia/data/vocab.tar.gz +!tar xf vocab.tar.gz + + +with open("vocabs/answers_textvqa_more_than_1.txt") as f: + vocab = f.readlines() + +answer_to_idx = {} +for idx, entry in enumerate(vocab): + answer_to_idx[entry.strip("\n")] = idx + + +###################################################################### +# We see there are 3997 answer classes including a class representing +# unknown answers +# + +print(len(vocab)) +print(vocab[:5]) + +from datasets import load_dataset +dataset = load_dataset("textvqa") + +from IPython.display import display, Image +idx = 5 +print("Question: ", dataset["train"][idx]["question"]) +print("Answers: " ,dataset["train"][idx]["answers"]) +display(dataset["train"][idx]["image"].resize((500,500))) + + +###################################################################### +# Next we write the transform function to convert the image and text into +# Tensors consumable by our model - For images, we use the transforms from +# torchvision to convert to Tensor and resize to uniform sizes - For text, +# we tokenize (and pad) them using the BertTokenizer from HuggingFace - +# For answers (i.e. labels), we take the most frequently occuring answer +# as the label to train with +# + +import torch +from torchvision import transforms +from collections import defaultdict +from transformers import BertTokenizer +from functools import partial + +def transform(tokenizer, input): + batch = {} + image_transform = transforms.Compose([transforms.ToTensor(), transforms.Resize([224,224])]) + image = image_transform(input["image"][0].convert("RGB")) + batch["image"] = [image] + + tokenized=tokenizer(input["question"],return_tensors='pt',padding="max_length",max_length=512) + batch.update(tokenized) + + + ans_to_count = defaultdict(int) + for ans in input["answers"][0]: + ans_to_count[ans] += 1 + max_value = max(ans_to_count, key=ans_to_count.get) + ans_idx = answer_to_idx.get(max_value,0) + batch["answers"] = torch.as_tensor([ans_idx]) + + return batch + +tokenizer=BertTokenizer.from_pretrained("bert-base-uncased",padding="max_length",max_length=512) +transform=partial(transform,tokenizer) +dataset.set_transform(transform) + + +###################################################################### +# Finally, we import the flava_model_for_classification from +# torchmultimodal. It loads the pretrained flava checkpoint by default and +# includes a classification head. +# +# The model forward function passes the image through the visual encoder +# and the question through the text encoder. The image and question +# embeddings are then passed through the multimodal encoder. The final +# embedding corresponding to the CLS token is passed through a MLP head +# which finally gives the probability distribution over each possible +# answers. +# + +from torchmultimodal.models.flava.model import flava_model_for_classification +model = flava_model_for_classification(num_classes=len(vocab)) + + +###################################################################### +# We put together the dataset and model in a toy training loop to +# demonstrate how to train the model for 3 iterations. +# + +from torch import nn +BATCH_SIZE = 2 +MAX_STEPS = 3 +from torch.utils.data import DataLoader + +train_dataloader = DataLoader(dataset["train"], batch_size= BATCH_SIZE) +optimizer = torch.optim.AdamW(model.parameters()) + + +epochs = 1 +for _ in range(epochs): + for idx, batch in enumerate(train_dataloader): + optimizer.zero_grad() + out = model(text = batch["input_ids"], image = batch["image"], labels = batch["answers"], required_embedding="mm") + loss = out.loss + loss.backward() + optimizer.step() + print(f"Loss at step {idx} = {loss}") + if idx > MAX_STEPS-1: + break + + +###################################################################### +# Conclusion +# +# This tutorial introduced the basics around how to finetune on a +# multimodal task using FLAVA from TorchMultimodal. Please also check out +# other examples from the library like +# `MDETR `__ +# which is a multimodal model for object detection and +# `Omnivore `__ +# which is multitask model spanning image, video and 3d classification. +# + diff --git a/index.rst b/index.rst index 89f04219d87..5bb20a34825 100644 --- a/index.rst +++ b/index.rst @@ -646,6 +646,15 @@ What's new in PyTorch tutorials? :link: advanced/sharding.html :tags: TorchRec,Recommender +.. Multimodality + +.. customcarditem:: + :header: Introduction to TorchMultimodal + :card_description: TorchMultimodal is a library that provides models, primitives and examples for training multimodal tasks + :image: _static/img/thumbnails/torchrec.png + :link: beginner/flava_finetuning_tutorial.html + :tags: TorchMultimodal + .. End of tutorial card section @@ -919,3 +928,11 @@ Additional Resources intermediate/torchrec_tutorial advanced/sharding + +.. toctree:: + :maxdepth: 2 + :includehidden: + :hidden: + :caption: Multimodality + + beginner/flava_finetuning_tutorial diff --git a/requirements.txt b/requirements.txt index dd632f368a4..cd621afed32 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,6 +27,9 @@ pytorch-lightning torchx ax-platform nbformat>=4.2.0 +datasets +transformers +torchmultimodal-nightly # PyTorch Theme -e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme From 3188b6644af25dbd65c646741c1db29faa0eb1dd Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 26 Sep 2022 12:48:32 -0700 Subject: [PATCH 03/23] Update --- beginner_source/flava_finetuning_tutorial.py | 57 ++++++++------------ requirements.txt | 4 ++ 2 files changed, 27 insertions(+), 34 deletions(-) diff --git a/beginner_source/flava_finetuning_tutorial.py b/beginner_source/flava_finetuning_tutorial.py index 0a067043e6d..ec87ba86f7d 100644 --- a/beginner_source/flava_finetuning_tutorial.py +++ b/beginner_source/flava_finetuning_tutorial.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """ -TorchMultimodal Tutorial : Finetuning FLAVA -======================= +TorchMultimodal Tutorial: Finetuning FLAVA +============================================ """ ###################################################################### @@ -14,43 +14,33 @@ # # In this tutorial, we will demonstrate how to use a **pretrained SoTA # model called** `FLAVA `__ **from -# TorchMultimodal library to finetune on a multimodal task i.e. visual +# TorchMultimodal library to finetune on a multimodal task i.e. visual # question answering** (VQA). # ###################################################################### # Installations -# +# +# # We will use TextVQA dataset from HuggingFace for this -# tutorial. So we install datasets in addition to TorchMultimodal -# +# tutorial. So we install datasets in addition to TorchMultimodal. -# TODO: replace with install from pip when binary is ready -!git clone https://github.com/facebookresearch/multimodal.git -!pip install -r multimodal/requirements.txt import os -import sys +import sys sys.path.append(os.path.join(os.getcwd(),"multimodal")) sys.path.append(os.getcwd()) -!pip install datasets -!pip install transformers - ###################################################################### # For this tutorial, we treat VQA as a classification task. So we need to # download the vocab file with answer classes and create the answer to # label mapping. -# +# # We also load the `textvqa # dataset `__ from HuggingFace -# +# -!wget http://dl.fbaipublicfiles.com/pythia/data/vocab.tar.gz -!tar xf vocab.tar.gz - - -with open("vocabs/answers_textvqa_more_than_1.txt") as f: +with open("data/vocabs/answers_textvqa_more_than_1.txt") as f: vocab = f.readlines() answer_to_idx = {} @@ -61,7 +51,7 @@ ###################################################################### # We see there are 3997 answer classes including a class representing # unknown answers -# +# print(len(vocab)) print(vocab[:5]) @@ -81,9 +71,9 @@ # Tensors consumable by our model - For images, we use the transforms from # torchvision to convert to Tensor and resize to uniform sizes - For text, # we tokenize (and pad) them using the BertTokenizer from HuggingFace - -# For answers (i.e. labels), we take the most frequently occuring answer +# For answers (i.e. labels), we take the most frequently occuring answer # as the label to train with -# +# import torch from torchvision import transforms @@ -99,15 +89,15 @@ def transform(tokenizer, input): tokenized=tokenizer(input["question"],return_tensors='pt',padding="max_length",max_length=512) batch.update(tokenized) - - + + ans_to_count = defaultdict(int) for ans in input["answers"][0]: ans_to_count[ans] += 1 max_value = max(ans_to_count, key=ans_to_count.get) ans_idx = answer_to_idx.get(max_value,0) batch["answers"] = torch.as_tensor([ans_idx]) - + return batch tokenizer=BertTokenizer.from_pretrained("bert-base-uncased",padding="max_length",max_length=512) @@ -119,14 +109,14 @@ def transform(tokenizer, input): # Finally, we import the flava_model_for_classification from # torchmultimodal. It loads the pretrained flava checkpoint by default and # includes a classification head. -# +# # The model forward function passes the image through the visual encoder # and the question through the text encoder. The image and question # embeddings are then passed through the multimodal encoder. The final # embedding corresponding to the CLS token is passed through a MLP head # which finally gives the probability distribution over each possible # answers. -# +# from torchmultimodal.models.flava.model import flava_model_for_classification model = flava_model_for_classification(num_classes=len(vocab)) @@ -135,7 +125,7 @@ def transform(tokenizer, input): ###################################################################### # We put together the dataset and model in a toy training loop to # demonstrate how to train the model for 3 iterations. -# +# from torch import nn BATCH_SIZE = 2 @@ -144,8 +134,8 @@ def transform(tokenizer, input): train_dataloader = DataLoader(dataset["train"], batch_size= BATCH_SIZE) optimizer = torch.optim.AdamW(model.parameters()) - - + + epochs = 1 for _ in range(epochs): for idx, batch in enumerate(train_dataloader): @@ -161,7 +151,7 @@ def transform(tokenizer, input): ###################################################################### # Conclusion -# +# # This tutorial introduced the basics around how to finetune on a # multimodal task using FLAVA from TorchMultimodal. Please also check out # other examples from the library like @@ -169,5 +159,4 @@ def transform(tokenizer, input): # which is a multimodal model for object detection and # `Omnivore `__ # which is multitask model spanning image, video and 3d classification. -# - +# diff --git a/requirements.txt b/requirements.txt index 95a778aa2c0..cdbf101b477 100644 --- a/requirements.txt +++ b/requirements.txt @@ -48,3 +48,7 @@ wget gym==0.24.0 gym-super-mario-bros==7.3.0 timm + +# flava tutorial - multimodal +packaging +iopath From 7002ed559f90f352bf3c0cd67afef2b09ba4cdf9 Mon Sep 17 00:00:00 2001 From: Ankita De Date: Mon, 26 Sep 2022 22:36:29 -0700 Subject: [PATCH 04/23] Fix imports --- beginner_source/flava_finetuning_tutorial.py | 16 +++++++++------- requirements.txt | 2 -- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/beginner_source/flava_finetuning_tutorial.py b/beginner_source/flava_finetuning_tutorial.py index 8585184fc9a..b892bc0bfe9 100644 --- a/beginner_source/flava_finetuning_tutorial.py +++ b/beginner_source/flava_finetuning_tutorial.py @@ -22,14 +22,16 @@ ###################################################################### # Installations # +# We will use TextVQA dataset and bert tokenizer from HuggingFace for this +# tutorial. So you need to install datasets and transformers in addition to TorchMultimodal. +# When running this tutorial in Google Colab, install the required packages +# by uncommenting the following: # -# We will use TextVQA dataset from HuggingFace for this -# tutorial. So we install datasets in addition to TorchMultimodal. - -import os -import sys -sys.path.append(os.path.join(os.getcwd(),"multimodal")) -sys.path.append(os.getcwd()) +""" +!pip install torchmultimodal-nightly +!pip install datasets +!pip install transformers +""" ###################################################################### # For this tutorial, we treat VQA as a classification task. So we need to diff --git a/requirements.txt b/requirements.txt index cdbf101b477..7f972af3fa2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -49,6 +49,4 @@ gym==0.24.0 gym-super-mario-bros==7.3.0 timm -# flava tutorial - multimodal -packaging iopath From c33c3aa435575cba1b005b440cc466295f468dbe Mon Sep 17 00:00:00 2001 From: Ankita De Date: Mon, 3 Oct 2022 09:44:03 -0700 Subject: [PATCH 05/23] Address comments --- beginner_source/flava_finetuning_tutorial.py | 59 ++++++++++++-------- 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/beginner_source/flava_finetuning_tutorial.py b/beginner_source/flava_finetuning_tutorial.py index b892bc0bfe9..d0af50a7031 100644 --- a/beginner_source/flava_finetuning_tutorial.py +++ b/beginner_source/flava_finetuning_tutorial.py @@ -15,13 +15,15 @@ # In this tutorial, we will demonstrate how to use a **pretrained SoTA # model called** `FLAVA `__ **from # TorchMultimodal library to finetune on a multimodal task i.e. visual -# question answering** (VQA). -# +# question answering** (VQA). The model consists of two unimodal transformer +# based encoders for text and image and a multimodal encoder to combine +# the two embeddings. It is pretrained using contrastive, image text matching and +# text, image and multimodal masking losses. ###################################################################### -# Installations -# +# Installation +# ----------------- # We will use TextVQA dataset and bert tokenizer from HuggingFace for this # tutorial. So you need to install datasets and transformers in addition to TorchMultimodal. # When running this tutorial in Google Colab, install the required packages @@ -34,13 +36,25 @@ """ ###################################################################### -# For this tutorial, we treat VQA as a classification task. So we need to -# download the vocab file with answer classes and create the answer to +# Steps +# ----- +# 1. Download the HuggingFace dataset to a directory on your computer by running the following command: +# wget http://dl.fbaipublicfiles.com/pythia/data/vocab.tar.gz +# tar xf vocab.tar.gz +# If you are running this tutorial in Google Colab, run these commands +# in a new cell and prepend these commands with an exclamation mark (!) +# +# +# 2. For this tutorial, we treat VQA as a classification task where the inputs are images and question (text) and the output is an answer class. +# So we need to download the vocab file with answer classes and create the answer to # label mapping. # # We also load the `textvqa -# dataset `__ from HuggingFace +# dataset `__ containing 34602 training samples +# (images,questions and answers) from HuggingFace # +# We see there are 3997 answer classes including a class representing +# unknown answers. with open("data/vocabs/answers_textvqa_more_than_1.txt") as f: vocab = f.readlines() @@ -48,28 +62,27 @@ answer_to_idx = {} for idx, entry in enumerate(vocab): answer_to_idx[entry.strip("\n")] = idx - - -###################################################################### -# We see there are 3997 answer classes including a class representing -# unknown answers -# - print(len(vocab)) print(vocab[:5]) + from datasets import load_dataset dataset = load_dataset("textvqa") -from IPython.display import display, Image -idx = 5 -print("Question: ", dataset["train"][idx]["question"]) +###################################################################### +# Lets display a sample entry from the dataset + +import matplotlib.pyplot as plt +import numpy as np +idx = 5 +print("Question: ", dataset["train"][idx]["question"]) print("Answers: " ,dataset["train"][idx]["answers"]) -display(dataset["train"][idx]["image"].resize((500,500))) +im = np.asarray(dataset["train"][idx]["image"].resize((500,500))) +plt.imshow(im) plt.show() ###################################################################### -# Next we write the transform function to convert the image and text into +# 3. Next, we write the transform function to convert the image and text into # Tensors consumable by our model - For images, we use the transforms from # torchvision to convert to Tensor and resize to uniform sizes - For text, # we tokenize (and pad) them using the BertTokenizer from HuggingFace - @@ -107,7 +120,7 @@ def transform(tokenizer, input): ###################################################################### -# Finally, we import the flava_model_for_classification from +# 4. Finally, we import the flava_model_for_classification from # torchmultimodal. It loads the pretrained flava checkpoint by default and # includes a classification head. # @@ -124,7 +137,7 @@ def transform(tokenizer, input): ###################################################################### -# We put together the dataset and model in a toy training loop to +# 5. We put together the dataset and model in a toy training loop to # demonstrate how to train the model for 3 iterations. # @@ -141,7 +154,7 @@ def transform(tokenizer, input): for _ in range(epochs): for idx, batch in enumerate(train_dataloader): optimizer.zero_grad() - out = model(text = batch["input_ids"], image = batch["image"], labels = batch["answers"], required_embedding="mm") + out = model(text = batch["input_ids"], image = batch["image"], labels = batch["answers"]) loss = out.loss loss.backward() optimizer.step() @@ -152,7 +165,7 @@ def transform(tokenizer, input): ###################################################################### # Conclusion -# +# ------------------- # This tutorial introduced the basics around how to finetune on a # multimodal task using FLAVA from TorchMultimodal. Please also check out # other examples from the library like From 31d1ca4931eec6e7000894e5f97e201fbcd1106f Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 3 Oct 2022 12:40:54 -0700 Subject: [PATCH 06/23] Fix syntaxerror --- beginner_source/flava_finetuning_tutorial.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/beginner_source/flava_finetuning_tutorial.py b/beginner_source/flava_finetuning_tutorial.py index d0af50a7031..34d7b533123 100644 --- a/beginner_source/flava_finetuning_tutorial.py +++ b/beginner_source/flava_finetuning_tutorial.py @@ -26,14 +26,17 @@ # ----------------- # We will use TextVQA dataset and bert tokenizer from HuggingFace for this # tutorial. So you need to install datasets and transformers in addition to TorchMultimodal. -# When running this tutorial in Google Colab, install the required packages -# by uncommenting the following: # -""" -!pip install torchmultimodal-nightly -!pip install datasets -!pip install transformers -""" +# .. note:: +# +# When running this tutorial in Google Colab, install the required packages by +# creating a new cell and running the following commands: +# +# .. code-block:: +# +# !pip install torchmultimodal-nightly +# !pip install datasets +# !pip install transformers ###################################################################### # Steps From 6b6563a0b9f9f71bfa1f60e996cadea15575d04d Mon Sep 17 00:00:00 2001 From: Ankita De Date: Mon, 3 Oct 2022 15:32:21 -0700 Subject: [PATCH 07/23] Fix syntax --- beginner_source/flava_finetuning_tutorial.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/beginner_source/flava_finetuning_tutorial.py b/beginner_source/flava_finetuning_tutorial.py index 34d7b533123..5a42799ca3f 100644 --- a/beginner_source/flava_finetuning_tutorial.py +++ b/beginner_source/flava_finetuning_tutorial.py @@ -81,7 +81,8 @@ print("Question: ", dataset["train"][idx]["question"]) print("Answers: " ,dataset["train"][idx]["answers"]) im = np.asarray(dataset["train"][idx]["image"].resize((500,500))) -plt.imshow(im) plt.show() +plt.imshow(im) +plt.show() ###################################################################### From 0fe598ca4d3eeb877ac6710ab0b775fce56344e6 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 3 Oct 2022 16:17:28 -0700 Subject: [PATCH 08/23] Fix formatting --- beginner_source/flava_finetuning_tutorial.py | 38 ++++++++++++-------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/beginner_source/flava_finetuning_tutorial.py b/beginner_source/flava_finetuning_tutorial.py index 5a42799ca3f..92bc5031709 100644 --- a/beginner_source/flava_finetuning_tutorial.py +++ b/beginner_source/flava_finetuning_tutorial.py @@ -37,27 +37,36 @@ # !pip install torchmultimodal-nightly # !pip install datasets # !pip install transformers +# ###################################################################### # Steps # ----- +# # 1. Download the HuggingFace dataset to a directory on your computer by running the following command: -# wget http://dl.fbaipublicfiles.com/pythia/data/vocab.tar.gz -# tar xf vocab.tar.gz -# If you are running this tutorial in Google Colab, run these commands -# in a new cell and prepend these commands with an exclamation mark (!) +# +# .. code-block:: +# +# wget http://dl.fbaipublicfiles.com/pythia/data/vocab.tar.gz +# tar xf vocab.tar.gz +# +# .. note:: +# If you are running this tutorial in Google Colab, run these commands +# in a new cell and prepend these commands with an exclamation mark (!) # # -# 2. For this tutorial, we treat VQA as a classification task where the inputs are images and question (text) and the output is an answer class. -# So we need to download the vocab file with answer classes and create the answer to -# label mapping. +# 2. For this tutorial, we treat VQA as a classification task where +# the inputs are images and question (text) and the output is an answer class. +# So we need to download the vocab file with answer classes and create the answer to +# label mapping. # -# We also load the `textvqa -# dataset `__ containing 34602 training samples -# (images,questions and answers) from HuggingFace +# We also load the `textvqa +# dataset `__ containing 34602 training samples +# (images,questions and answers) from HuggingFace # # We see there are 3997 answer classes including a class representing # unknown answers. +# with open("data/vocabs/answers_textvqa_more_than_1.txt") as f: vocab = f.readlines() @@ -68,12 +77,12 @@ print(len(vocab)) print(vocab[:5]) - from datasets import load_dataset dataset = load_dataset("textvqa") ###################################################################### -# Lets display a sample entry from the dataset +# Lets display a sample entry from the dataset: +# import matplotlib.pyplot as plt import numpy as np @@ -91,7 +100,7 @@ # torchvision to convert to Tensor and resize to uniform sizes - For text, # we tokenize (and pad) them using the BertTokenizer from HuggingFace - # For answers (i.e. labels), we take the most frequently occuring answer -# as the label to train with +# as the label to train with: # import torch @@ -142,7 +151,7 @@ def transform(tokenizer, input): ###################################################################### # 5. We put together the dataset and model in a toy training loop to -# demonstrate how to train the model for 3 iterations. +# demonstrate how to train the model for 3 iterations: # from torch import nn @@ -170,6 +179,7 @@ def transform(tokenizer, input): ###################################################################### # Conclusion # ------------------- +# # This tutorial introduced the basics around how to finetune on a # multimodal task using FLAVA from TorchMultimodal. Please also check out # other examples from the library like From 720d370f2348d05c3376ff77bd3cba2eb0428640 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 10 Oct 2022 10:02:30 -0700 Subject: [PATCH 09/23] [DO NOT MERGE] 1.13 RC Test --- .jenkins/build.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index 3aca5ba2a01..ac7c333bcdc 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -26,8 +26,11 @@ pip install -r $DIR/../requirements.txt # RC Link # pip uninstall -y torch torchvision torchaudio torchtext # pip install --pre --upgrade -f https://download.pytorch.org/whl/test/cu102/torch_test.html torch torchvision torchaudio torchtext -# pip uninstall -y torch torchvision torchaudio torchtext -# pip install -f https://download.pytorch.org/whl/test/cu111/torch_test.html torch torchvision torchaudio torchtext + +# Test enabled for PyTorch 1.13 RC Below +pip uninstall -y torch torchvision torchaudio torchtext +pip install --extra-index-url https://download.pytorch.org/whl/test/cu116 torch torchvision torchaudio torchtext +pip install --extra-index-url https://download.pytorch.org/whl/test torchdata # Install two language tokenizers for Translation with TorchText tutorial python -m spacy download en_core_web_sm From e67331d720d55444a09d97d22e74712a88f86a8b Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Tue, 11 Oct 2022 10:15:46 -0700 Subject: [PATCH 10/23] Update .jenkins/build.sh Co-authored-by: Nikita Shulga --- .jenkins/build.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index ac7c333bcdc..f314e2f9610 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -30,7 +30,6 @@ pip install -r $DIR/../requirements.txt # Test enabled for PyTorch 1.13 RC Below pip uninstall -y torch torchvision torchaudio torchtext pip install --extra-index-url https://download.pytorch.org/whl/test/cu116 torch torchvision torchaudio torchtext -pip install --extra-index-url https://download.pytorch.org/whl/test torchdata # Install two language tokenizers for Translation with TorchText tutorial python -m spacy download en_core_web_sm From 38939c46282ed9b04a910a19bbee1b6f167b7511 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Thu, 13 Oct 2022 11:57:54 -0700 Subject: [PATCH 11/23] Update build.sh --- .jenkins/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index f314e2f9610..264d4bf6e42 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -29,7 +29,7 @@ pip install -r $DIR/../requirements.txt # Test enabled for PyTorch 1.13 RC Below pip uninstall -y torch torchvision torchaudio torchtext -pip install --extra-index-url https://download.pytorch.org/whl/test/cu116 torch torchvision torchaudio torchtext +pip3 install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/test/cu116/torch_test.html # Install two language tokenizers for Translation with TorchText tutorial python -m spacy download en_core_web_sm From c0d5fedbc47dc1f0ad708f8a8bf0569a4f76d040 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Fri, 14 Oct 2022 11:37:46 -0700 Subject: [PATCH 12/23] Update build.sh --- .jenkins/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index 264d4bf6e42..15b0ff90b25 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -29,7 +29,7 @@ pip install -r $DIR/../requirements.txt # Test enabled for PyTorch 1.13 RC Below pip uninstall -y torch torchvision torchaudio torchtext -pip3 install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/test/cu116/torch_test.html +pip3 install --pre torch torchvision torchaudio torchtext -f https://download.pytorch.org/whl/test/cu116/torch_test.html # Install two language tokenizers for Translation with TorchText tutorial python -m spacy download en_core_web_sm From f509d8e1a3bf9a6de6554e17d8cd2cf359c76d8d Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 17 Oct 2022 13:33:09 -0700 Subject: [PATCH 13/23] Update build.sh --- .jenkins/build.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index 15b0ff90b25..23cc9d4dbdc 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -29,7 +29,8 @@ pip install -r $DIR/../requirements.txt # Test enabled for PyTorch 1.13 RC Below pip uninstall -y torch torchvision torchaudio torchtext -pip3 install --pre torch torchvision torchaudio torchtext -f https://download.pytorch.org/whl/test/cu116/torch_test.html +pip3 install --pre torch torchvision torchaudio torchtext -f https://download.pytorch.org/whl/test torchdata https://download.pytorch.org/whl/test/cu116/torch_test.html +pip install --pre --extra-index-url https://download.pytorch.org/whl/test torchdata # Install two language tokenizers for Translation with TorchText tutorial python -m spacy download en_core_web_sm From d6e72e015df9b89e5ac31fd6de8710d899976b99 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 17 Oct 2022 13:42:54 -0700 Subject: [PATCH 14/23] Update build.sh --- .jenkins/build.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index 23cc9d4dbdc..f3c82763e96 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -29,8 +29,7 @@ pip install -r $DIR/../requirements.txt # Test enabled for PyTorch 1.13 RC Below pip uninstall -y torch torchvision torchaudio torchtext -pip3 install --pre torch torchvision torchaudio torchtext -f https://download.pytorch.org/whl/test torchdata https://download.pytorch.org/whl/test/cu116/torch_test.html -pip install --pre --extra-index-url https://download.pytorch.org/whl/test torchdata +pip install --pre -f https://download.pytorch.org/whl/test torch torchvision torchaudio torchtext # Install two language tokenizers for Translation with TorchText tutorial python -m spacy download en_core_web_sm From 5fbf500bf90518fe8b881c26b683b85692c3d8e3 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 17 Oct 2022 13:51:19 -0700 Subject: [PATCH 15/23] Update build.sh --- .jenkins/build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index f3c82763e96..2ae72906c4b 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -25,11 +25,11 @@ pip install -r $DIR/../requirements.txt # Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html # RC Link # pip uninstall -y torch torchvision torchaudio torchtext -# pip install --pre --upgrade -f https://download.pytorch.org/whl/test/cu102/torch_test.html torch torchvision torchaudio torchtext +# pip install --pre --upgrade -f https://download.pytorch.org/whl/test/cu102/torch_test.html torch torchvision torchaudio torchtext # Test enabled for PyTorch 1.13 RC Below pip uninstall -y torch torchvision torchaudio torchtext -pip install --pre -f https://download.pytorch.org/whl/test torch torchvision torchaudio torchtext +pip install --pre torch torchdata torchvision torchaudio torchtext -f https://download.pytorch.org/whl/test/cu116/torch_test.html # Install two language tokenizers for Translation with TorchText tutorial python -m spacy download en_core_web_sm From 3c7694f89a125621c7705cae2d4c99c25767286d Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 17 Oct 2022 15:00:37 -0700 Subject: [PATCH 16/23] Update build.sh --- .jenkins/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index 2ae72906c4b..7019766f144 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -29,7 +29,7 @@ pip install -r $DIR/../requirements.txt # Test enabled for PyTorch 1.13 RC Below pip uninstall -y torch torchvision torchaudio torchtext -pip install --pre torch torchdata torchvision torchaudio torchtext -f https://download.pytorch.org/whl/test/cu116/torch_test.html +pip install --pre --upgrade -f https://download.pytorch.org/whl/test/cu116/torch_test.html torch torchdata torchvision torchaudio torchtext # Install two language tokenizers for Translation with TorchText tutorial python -m spacy download en_core_web_sm From 3559c44e1b35d16ca0e31daca1f27448f9eb70e1 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 17 Oct 2022 15:36:00 -0700 Subject: [PATCH 17/23] Remove functorch --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 74e2da5fad3..67bec81cd2b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,7 +13,6 @@ torchvision torchtext torchaudio torchdata -functorch>=0.2.1 networkx PyHamcrest bs4 From 06b98742c32ffc60bf3967b266e30014c69d08e3 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Wed, 19 Oct 2022 13:28:00 -0700 Subject: [PATCH 18/23] Temporarily disabling fx_numeric_suite_tutorial --- .jenkins/build.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index 7019766f144..fee713f588f 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -49,6 +49,8 @@ if [[ "${JOB_BASE_NAME}" == *worker_* ]]; then # python $DIR/remove_runnable_code.py intermediate_source/spatial_transformer_tutorial.py intermediate_source/spatial_transformer_tutorial.py || true # Temp remove for 1.10 release. # python $DIR/remove_runnable_code.py advanced_source/neural_style_tutorial.py advanced_source/neural_style_tutorial.py || true + # Temp remove for 1.13 release. + python $DIR/remove_runnable_code.py beginner_source/fx_numeric_suite_tutorial.py || true # TODO: Fix bugs in these tutorials to make them runnable again # python $DIR/remove_runnable_code.py beginner_source/audio_classifier_tutorial.py beginner_source/audio_classifier_tutorial.py || true From 3c0fc31990aa12094a70ca536c3131ff4343a70d Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Wed, 19 Oct 2022 14:19:51 -0700 Subject: [PATCH 19/23] Update build.sh --- .jenkins/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index fee713f588f..5263045a4bd 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -50,7 +50,7 @@ if [[ "${JOB_BASE_NAME}" == *worker_* ]]; then # Temp remove for 1.10 release. # python $DIR/remove_runnable_code.py advanced_source/neural_style_tutorial.py advanced_source/neural_style_tutorial.py || true # Temp remove for 1.13 release. - python $DIR/remove_runnable_code.py beginner_source/fx_numeric_suite_tutorial.py || true + python $DIR/remove_runnable_code.py beginner_source/fx_numeric_suite_tutorial.py beginner_source/fx_numeric_suite_tutorial.py || true # TODO: Fix bugs in these tutorials to make them runnable again # python $DIR/remove_runnable_code.py beginner_source/audio_classifier_tutorial.py beginner_source/audio_classifier_tutorial.py || true From a449a551d8b074766b689866297a73f9af782da3 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Thu, 20 Oct 2022 08:32:37 -0700 Subject: [PATCH 20/23] Disable in the validate list --- .jenkins/validate_tutorials_built.py | 1 + 1 file changed, 1 insertion(+) diff --git a/.jenkins/validate_tutorials_built.py b/.jenkins/validate_tutorials_built.py index 92570124a4e..cc01326b44c 100644 --- a/.jenkins/validate_tutorials_built.py +++ b/.jenkins/validate_tutorials_built.py @@ -50,6 +50,7 @@ "recipes/Captum_Recipe", "hyperparameter_tuning_tutorial", "flask_rest_api_tutorial", + "fx_numeric_suite_tutorial", # remove when https://github.com/pytorch/tutorials/pull/2089 is fixed ] From 047a956b922a6485eff76d3ce46f319f235be173 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Thu, 20 Oct 2022 08:52:52 -0700 Subject: [PATCH 21/23] Disable ax tutorial --- .jenkins/validate_tutorials_built.py | 1 + 1 file changed, 1 insertion(+) diff --git a/.jenkins/validate_tutorials_built.py b/.jenkins/validate_tutorials_built.py index cc01326b44c..5f9d563475f 100644 --- a/.jenkins/validate_tutorials_built.py +++ b/.jenkins/validate_tutorials_built.py @@ -51,6 +51,7 @@ "hyperparameter_tuning_tutorial", "flask_rest_api_tutorial", "fx_numeric_suite_tutorial", # remove when https://github.com/pytorch/tutorials/pull/2089 is fixed + "ax_multiobjective_nas_tutorial", ] From cff152efb0ebc51fb0decaa7254d1c365065d899 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Wed, 26 Oct 2022 15:00:59 -0700 Subject: [PATCH 22/23] rebase --- .jenkins/build.sh | 7 ------- .jenkins/validate_tutorials_built.py | 2 -- requirements.txt | 3 ++- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index 5263045a4bd..edd2ffa5cb3 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -25,11 +25,6 @@ pip install -r $DIR/../requirements.txt # Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html # RC Link # pip uninstall -y torch torchvision torchaudio torchtext -# pip install --pre --upgrade -f https://download.pytorch.org/whl/test/cu102/torch_test.html torch torchvision torchaudio torchtext - -# Test enabled for PyTorch 1.13 RC Below -pip uninstall -y torch torchvision torchaudio torchtext -pip install --pre --upgrade -f https://download.pytorch.org/whl/test/cu116/torch_test.html torch torchdata torchvision torchaudio torchtext # Install two language tokenizers for Translation with TorchText tutorial python -m spacy download en_core_web_sm @@ -49,8 +44,6 @@ if [[ "${JOB_BASE_NAME}" == *worker_* ]]; then # python $DIR/remove_runnable_code.py intermediate_source/spatial_transformer_tutorial.py intermediate_source/spatial_transformer_tutorial.py || true # Temp remove for 1.10 release. # python $DIR/remove_runnable_code.py advanced_source/neural_style_tutorial.py advanced_source/neural_style_tutorial.py || true - # Temp remove for 1.13 release. - python $DIR/remove_runnable_code.py beginner_source/fx_numeric_suite_tutorial.py beginner_source/fx_numeric_suite_tutorial.py || true # TODO: Fix bugs in these tutorials to make them runnable again # python $DIR/remove_runnable_code.py beginner_source/audio_classifier_tutorial.py beginner_source/audio_classifier_tutorial.py || true diff --git a/.jenkins/validate_tutorials_built.py b/.jenkins/validate_tutorials_built.py index 5f9d563475f..92570124a4e 100644 --- a/.jenkins/validate_tutorials_built.py +++ b/.jenkins/validate_tutorials_built.py @@ -50,8 +50,6 @@ "recipes/Captum_Recipe", "hyperparameter_tuning_tutorial", "flask_rest_api_tutorial", - "fx_numeric_suite_tutorial", # remove when https://github.com/pytorch/tutorials/pull/2089 is fixed - "ax_multiobjective_nas_tutorial", ] diff --git a/requirements.txt b/requirements.txt index 34d777076b7..28a37d88ac2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,6 +13,7 @@ torchvision torchtext torchaudio torchdata +functorch>=0.2.1 networkx PyHamcrest bs4 @@ -28,7 +29,7 @@ ax-platform nbformat>=4.2.0 datasets transformers -torchmultimodal-nightly +torchmultimodal-nightly # needs to be updated to stable as soon as it's avaialable deep_phonemizer==0.0.17 # the following is necessary due to https://github.com/python/importlib_metadata/issues/411 From 1c114442e081b0e538a28fd03c6d076236a257c9 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Wed, 26 Oct 2022 15:02:32 -0700 Subject: [PATCH 23/23] Small fix --- .jenkins/build.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index edd2ffa5cb3..3aca5ba2a01 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -25,6 +25,9 @@ pip install -r $DIR/../requirements.txt # Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html # RC Link # pip uninstall -y torch torchvision torchaudio torchtext +# pip install --pre --upgrade -f https://download.pytorch.org/whl/test/cu102/torch_test.html torch torchvision torchaudio torchtext +# pip uninstall -y torch torchvision torchaudio torchtext +# pip install -f https://download.pytorch.org/whl/test/cu111/torch_test.html torch torchvision torchaudio torchtext # Install two language tokenizers for Translation with TorchText tutorial python -m spacy download en_core_web_sm