diff --git a/intermediate_source/dynamic_quantization_bert_tutorial.py b/intermediate_source/dynamic_quantization_bert_tutorial.py index 39de14549c3..aa10c9c513a 100644 --- a/intermediate_source/dynamic_quantization_bert_tutorial.py +++ b/intermediate_source/dynamic_quantization_bert_tutorial.py @@ -44,7 +44,7 @@ # `_. The MRPC (Dolan and Brockett, 2005) is # a corpus of sentence pairs automatically extracted from online news # sources, with human annotations of whether the sentences in the pair -# are semantically equivalent. Because the classes are imbalanced (68% +# are semantically equivalent. As the classes are imbalanced (68% # positive, 32% negative), we follow the common practice and report # `F1 score `_. # MRPC is a common NLP task for language pair classification, as shown @@ -55,10 +55,10 @@ ###################################################################### # 1. Setup -# ------- +# -------- # -# Install PyTorch and HuggingFace Transformers -# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# 1.1 Install PyTorch and HuggingFace Transformers +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # To start this tutorial, let’s first follow the installation instructions # in PyTorch `here `_ and HuggingFace Github Repo `here `_. @@ -87,8 +87,8 @@ ###################################################################### -# 2. Import the necessary modules -# ---------------------------- +# 1.2 Import the necessary modules +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # In this step we import the necessary Python modules for the tutorial. # @@ -130,13 +130,13 @@ ###################################################################### -# 3. Download the dataset -# -------------------- +# 1.3 Download the dataset +# ^^^^^^^^^^^^^^^^^^^^^^^^ # # Before running MRPC tasks we download the `GLUE data # `_ by running `this script # `_ -# and unpack it to a directory `glue_data`. +# and unpack it to a directory ``glue_data``. # # # .. code:: shell @@ -146,8 +146,8 @@ ###################################################################### -# 4. Helper functions -# ---------------- +# 1.4 Learn about helper functions +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # The helper functions are built-in in transformers library. We mainly use # the following helper functions: one for converting the text examples @@ -157,9 +157,9 @@ # The `glue_convert_examples_to_features `_ function converts the texts into input features: # # - Tokenize the input sequences; -# - Insert [CLS] at the beginning; +# - Insert [CLS] in the beginning; # - Insert [SEP] between the first sentence and the second sentence, and -# at the end; +# in the end; # - Generate token type ids to indicate whether a token belongs to the # first sequence or the second sequence. # @@ -167,15 +167,15 @@ # can be interpreted as a weighted average of the precision and recall, # where an F1 score reaches its best value at 1 and worst score at 0. The # relative contribution of precision and recall to the F1 score are equal. -# The equation for the F1 score is: # -# - F1 = 2 \* (precision \* recall) / (precision + recall) +# - The equation for the F1 score is: +# .. math:: F1 = 2 * (\text{precision} * \text{recall}) / (\text{precision} + \text{recall}) # ###################################################################### -# 5. Fine-tune the BERT model -# -------------------------- +# 2. Fine-tune the BERT model +# --------------------------- # @@ -216,8 +216,8 @@ # To save time, you can download the model file (~400 MB) directly into your local folder ``$OUT_DIR``. ###################################################################### -# 6. Set global configurations -# ------------------------- +# 2.1 Set global configurations +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # @@ -264,12 +264,9 @@ def set_seed(seed): ###################################################################### -# 7. Load the fine-tuned BERT model -# ------------------------------ +# 2.2 Load the fine-tuned BERT model +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # - - -###################################################################### # We load the tokenizer and fine-tuned BERT sequence classifier model # (FP32) from the ``configs.output_dir``. # @@ -282,8 +279,8 @@ def set_seed(seed): ###################################################################### -# 8. Define the tokenize and evaluation function -# ------------------------------------------- +# 2.3 Define the tokenize and evaluation function +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # We reuse the tokenize and evaluation function from `Huggingface `_. # @@ -426,7 +423,7 @@ def load_and_cache_examples(args, task, tokenizer, evaluate=False): ###################################################################### -# 9. Apply the dynamic quantization +# 3. Apply the dynamic quantization # ------------------------------- # # We call ``torch.quantization.quantize_dynamic`` on the model to apply @@ -445,8 +442,8 @@ def load_and_cache_examples(args, task, tokenizer, evaluate=False): ###################################################################### -# 10. Check the model size -# -------------------- +# 3.1 Check the model size +# ^^^^^^^^^^^^^^^^^^^^^^^^ # # Let’s first check the model size. We can observe a significant reduction # in model size (FP32 total size: 438 MB; INT8 total size: 181 MB): @@ -472,8 +469,8 @@ def print_size_of_model(model): ###################################################################### -# 11. Evaluate the inference accuracy and time -# ---------------------------------------- +# 3.2 Evaluate the inference accuracy and time +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # Next, let’s compare the inference time as well as the evaluation # accuracy between the original FP32 model and the INT8 model after the @@ -513,7 +510,7 @@ def time_model_evaluation(model, configs, tokenizer): # comparison, in a `recent paper `_ (Table 1), # it achieved 0.8788 by # applying the post-training dynamic quantization and 0.8956 by applying -# the quantization-aware training. The main reason is that we support the +# the quantization-aware training. The main difference is that we support the # asymmetric quantization in PyTorch while that paper supports the # symmetric quantization only. # @@ -533,8 +530,8 @@ def time_model_evaluation(model, configs, tokenizer): ###################################################################### -# 12. Serialize the quantized model -# ----------------------------- +# 3.3 Serialize the quantized model +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # We can serialize and save the quantized model for the future use. #