From a800c7742426ad497ffd82718d2dfb485a818fae Mon Sep 17 00:00:00 2001 From: Raghu Krishnamoorthi Date: Wed, 4 Dec 2019 23:57:46 -0800 Subject: [PATCH 1/2] Fix formatting and clean up tutorial on quantized transfer learning Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: --- .../quantized_transfer_learning_tutorial.py | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/intermediate_source/quantized_transfer_learning_tutorial.py b/intermediate_source/quantized_transfer_learning_tutorial.py index 72a7be44252..9b878d470cb 100644 --- a/intermediate_source/quantized_transfer_learning_tutorial.py +++ b/intermediate_source/quantized_transfer_learning_tutorial.py @@ -84,9 +84,11 @@ ###################################################################### -# Load Data (section not needed as it is covered in the original tutorial) +# Load Data # ------------------------------------------------------------------------ # +# ..Note :: This section is identical to the original transfer learning tutorial. +# # We will use ``torchvision`` and ``torch.utils.data`` packages to load # the data. # @@ -360,7 +362,7 @@ def visualize_model(model, rows=3, cols=3): # **Notice that when isolating the feature extractor from a quantized # model, you have to place the quantizer in the beginning and in the end # of it.** -# +# We write a helper function to create a model with a custom head. from torch import nn @@ -394,8 +396,6 @@ def create_combined_model(model_fe): ) return new_model -new_model = create_combined_model(model_fe) - ###################################################################### # .. warning:: Currently the quantized models can only be run on CPU. @@ -404,6 +404,7 @@ def create_combined_model(model_fe): # import torch.optim as optim +new_model = create_combined_model(model_fe) new_model = new_model.to('cpu') criterion = nn.CrossEntropyLoss() @@ -431,7 +432,7 @@ def create_combined_model(model_fe): ###################################################################### -# **Part 2. Finetuning the quantizable model** +# Part 2. Finetuning the quantizable model # # In this part, we fine tune the feature extractor used for transfer # learning, and quantize the feature extractor. Note that in both part 1 @@ -446,18 +447,21 @@ def create_combined_model(model_fe): # datasets. # # The pretrained feature extractor must be quantizable, i.e we need to do -# the following: 1. Fuse (Conv, BN, ReLU), (Conv, BN) and (Conv, ReLU) -# using torch.quantization.fuse_modules. 2. Connect the feature extractor -# with a custom head. This requires dequantizing the output of the feature -# extractor. 3. Insert fake-quantization modules at appropriate locations -# in the feature extractor to mimic quantization during training. +# the following: +# 1. Fuse (Conv, BN, ReLU), (Conv, BN) and (Conv, ReLU) +# using torch.quantization.fuse_modules. +# 2. Connect the feature extractor +# with a custom head. This requires dequantizing the output of the feature +# extractor. +# 3. Insert fake-quantization modules at appropriate locations +# in the feature extractor to mimic quantization during training. # # For step (1), we use models from torchvision/models/quantization, which # support a member method fuse_model, which fuses all the conv, bn, and # relu modules. In general, this would require calling the # torch.quantization.fuse_modules API with the list of modules to fuse. # -# Step (2) is done by the function create_custom_model function that we +# Step (2) is done by the function create_combined_model function that we # used in the previous section. # # Step (3) is achieved by using torch.quantization.prepare_qat, which @@ -534,4 +538,3 @@ def create_combined_model(model_fe): plt.ioff() plt.tight_layout() plt.show() - From 09031269d6d6d0b4f1a024bade0e7118ba7705d9 Mon Sep 17 00:00:00 2001 From: Raghu Krishnamoorthi Date: Thu, 5 Dec 2019 17:46:24 -0800 Subject: [PATCH 2/2] Fix formatting issues with BERT tutorial to ensure hyperlinks are rendered correctly. --- .../dynamic_quantization_bert_tutorial.py | 50 +++++++++---------- 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/intermediate_source/dynamic_quantization_bert_tutorial.py b/intermediate_source/dynamic_quantization_bert_tutorial.py index 02ee84b36ea..0ef829e5c51 100644 --- a/intermediate_source/dynamic_quantization_bert_tutorial.py +++ b/intermediate_source/dynamic_quantization_bert_tutorial.py @@ -35,22 +35,20 @@ # are quantized dynamically (per batch) to int8 when the weights are # quantized to int8. # -# In PyTorch, we have ``torch.quantization.quantize_dynamic`` API support -# (https://pytorch.org/docs/stable/quantization.html#torch.quantization.quantize_dynamic), -# which replaces specified modules with dynamic weight-only quantized +# In PyTorch, we have `torch.quantization.quantize_dynamic API +# `_ +# ,which replaces specified modules with dynamic weight-only quantized # versions and output the quantized model. # # - We demonstrate the accuracy and inference performance results on the -# Microsoft Research Paraphrase Corpus (MRPC) task -# (https://www.microsoft.com/en-us/download/details.aspx?id=52398) in -# the General Language Understanding Evaluation benchmark (GLUE) -# (https://gluebenchmark.com/). The MRPC (Dolan and Brockett, 2005) is +# `Microsoft Research Paraphrase Corpus (MRPC) task `_ +# in the General Language Understanding Evaluation benchmark `(GLUE) +# `_. The MRPC (Dolan and Brockett, 2005) is # a corpus of sentence pairs automatically extracted from online news # sources, with human annotations of whether the sentences in the pair # are semantically equivalent. Because the classes are imbalanced (68% # positive, 32% negative), we follow common practice and report both -# accuracy and F1 score -# (https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html). +# accuracy and `F1 score `_ # MRPC is a common NLP task for language pair classification, as shown # below. # @@ -78,8 +76,10 @@ # # To start this tutorial, let’s first follow the installation instructions # in PyTorch and HuggingFace Github Repo: - -# https://github.com/pytorch/pytorch/#installation - -# https://github.com/huggingface/transformers#installation +# +# * https://github.com/pytorch/pytorch/#installation - +# +# * https://github.com/huggingface/transformers#installation # # In addition, we also install ``sklearn`` package, as we will reuse its # built-in F1 score calculation helper function. @@ -93,8 +93,8 @@ ###################################################################### # Because we will be using the experimental parts of the PyTorch, it is # recommended to install the latest version of torch and torchvision. You -# can find the most recent instructions on local installation here -# https://pytorch.org/get-started/locally/. For example, to install on +# can find the most recent instructions on local installation `here +# `_. For example, to install on # Mac: # # .. code:: shell @@ -149,10 +149,10 @@ # Download the dataset # -------------------- # -# Before running MRPC tasks we download the GLUE data -# (https://gluebenchmark.com/tasks) by running this script -# (https://gist.github.com/W4ngatang/60c2bdb54d156a41194446737ce03e2e, -# https://github.com/nyu-mll/GLUE-baselines/blob/master/download_glue_data.py) +# Before running MRPC tasks we download the `GLUE data +# `_ by running this `script +# `_ followed by +# `download_glue_data `_. # and unpack it to some directory “glue_data/MRPC”. # @@ -176,8 +176,7 @@ # Convert the texts into features # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # -# glue_convert_examples_to_features ( -# https://github.com/huggingface/transformers/blob/master/transformers/data/processors/glue.py) +# `glue_convert_examples_to_features `_. # load a data file into a list of ``InputFeatures``. # # - Tokenize the input sequences; @@ -190,8 +189,7 @@ # F1 metric # ~~~~~~~~~ # -# The F1 score -# (https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html) +# The `F1 score `_ # can be interpreted as a weighted average of the precision and recall, # where an F1 score reaches its best value at 1 and worst score at 0. The # relative contribution of precision and recall to the F1 score are equal. @@ -217,7 +215,7 @@ # # To fine-tune the pre-trained BERT model (“bert-base-uncased” model in # HuggingFace transformers) for the MRPC task, you can follow the command -# in (https://github.com/huggingface/transformers/tree/master/examples): +# in `examples`_" # # :: # @@ -333,10 +331,8 @@ def set_seed(seed): # Define the tokenize and evaluation function # ------------------------------------------- # -# We reuse the tokenize and evaluation function from -# https://github.com/huggingface/transformers/blob/master/examples/run_glue.py. +# We reuse the tokenize and evaluation function from `huggingface `_. # - # coding=utf-8 # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. @@ -598,8 +594,8 @@ def time_model_evaluation(model, configs, tokenizer): # set multi-thread by ``torch.set_num_threads(N)`` (``N`` is the number of # intra-op parallelization threads). One preliminary requirement to enable # the intra-op parallelization support is to build PyTorch with the right -# backend such as OpenMP, Native, or TBB -# (https://pytorch.org/docs/stable/notes/cpu_threading_torchscript_inference.html#build-options). +# `backend `_ +# such as OpenMP, Native or TBB. # You can use ``torch.__config__.parallel_info()`` to check the # parallelization settings. On the same MacBook Pro using PyTorch with # Native backend for parallelization, we can get about 46 seconds for