Merge branch 'main' into improve-quantization-recipe

jmarintur · web-flow · commit 592d369f7c0f · 2024-03-06T23:11:37.000+01:00
diff --git a/.jenkins/validate_tutorials_built.py b/.jenkins/validate_tutorials_built.py
@@ -28,7 +28,6 @@
     "intermediate_source/_torch_export_nightly_tutorial",  # does not work on release
     "advanced_source/super_resolution_with_onnxruntime",
     "advanced_source/ddp_pipeline",  # requires 4 gpus
-    "advanced_source/usb_semisup_learn", # in the current form takes 140+ minutes to build - can be enabled when the build time is reduced
     "prototype_source/fx_graph_mode_ptq_dynamic",
     "prototype_source/vmap_recipe",
     "prototype_source/torchscript_freezing",
diff --git a/README.md b/README.md
@@ -57,7 +57,13 @@ GALLERY_PATTERN="neural_style_transfer_tutorial.py" sphinx-build . _build
 
 The `GALLERY_PATTERN` variable respects regular expressions.
 
+
 ## About contributing to PyTorch Documentation and Tutorials
 * You can find information about contributing to PyTorch documentation in the 
 PyTorch Repo [README.md](https://github.com/pytorch/pytorch/blob/master/README.md) file. 
 * Additional information can be found in [PyTorch CONTRIBUTING.md](https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md).
+
+
+## License
+
+PyTorch Tutorials is BSD licensed, as found in the LICENSE file.
diff --git a/_static/img/usb_semisup_learn/code.png b/_static/img/usb_semisup_learn/code.png
diff --git a/advanced_source/usb_semisup_learn.py b/advanced_source/usb_semisup_learn.py
@@ -5,7 +5,7 @@
 **Author**: `Hao Chen <https://github.com/Hhhhhhao>`_
 
 Unified Semi-supervised learning Benchmark (USB) is a semi-supervised
-learning framework built upon PyTorch.
+learning (SSL) framework built upon PyTorch.
 Based on Datasets and Modules provided by PyTorch, USB becomes a flexible,
 modular, and easy-to-use framework for semi-supervised learning.
 It supports a variety of semi-supervised learning algorithms, including
@@ -17,7 +17,7 @@
 This tutorial will walk you through the basics of using the USB lighting
 package.
 Let's get started by training a ``FreeMatch``/``SoftMatch`` model on
-CIFAR-10 using pretrained ViT!
+CIFAR-10 using pretrained Vision Transformers (ViT)!
 And we will show it is easy to change the semi-supervised algorithm and train
 on imbalanced datasets.
 
@@ -64,6 +64,9 @@
 # Now, let's use USB to train ``FreeMatch`` and ``SoftMatch`` on CIFAR-10.
 # First, we need to install USB package ``semilearn`` and import necessary API
 # functions from USB.
+# If you are running this in Google Colab, install ``semilearn`` by running:
+# ``!pip install semilearn``.
+#
 # Below is a list of functions we will use from ``semilearn``:
 #
 # - ``get_dataset`` to load dataset, here we use CIFAR-10
@@ -77,6 +80,10 @@
 # - ``Trainer``: a Trainer class for training and evaluating the
 # algorithm on dataset
 # 
+# Note that a CUDA-enabled backend is required for training with the ``semilearn`` package.
+# See `Enabling CUDA in Google Colab <https://pytorch.org/tutorials/beginner/colab#using-cuda>`__ for instructions
+# on enabling CUDA in Google Colab.
+#
 import semilearn
 from semilearn import get_dataset, get_data_loader, get_net_builder, get_algorithm, get_config, Trainer
 
@@ -92,7 +99,7 @@
 
     # optimization configs
     'epoch': 1,  
-    'num_train_iter': 4000,  
+    'num_train_iter': 500,
     'num_eval_iter': 500,  
     'num_log_iter': 50,  
     'optim': 'AdamW',
@@ -141,16 +148,16 @@
 
 ######################################################################
 # We can start training the algorithms on CIFAR-10 with 40 labels now.
-# We train for 4000 iterations and evaluate every 500 iterations.
+# We train for 500 iterations and evaluate every 500 iterations.
 # 
 trainer = Trainer(config, algorithm)
 trainer.fit(train_lb_loader, train_ulb_loader, eval_loader)
 
 
 ######################################################################
 # Finally, let's evaluate the trained model on the validation set.
-# After training 4000 iterations with ``FreeMatch`` on only 40 labels of
-# CIFAR-10, we obtain a classifier that achieves above 93 accuracy on the validation set.
+# After training 500 iterations with ``FreeMatch`` on only 40 labels of
+# CIFAR-10, we obtain a classifier that achieves around 87% accuracy on the validation set.
 trainer.evaluate(eval_loader)
 
 
@@ -174,7 +181,7 @@
 
     # optimization configs
     'epoch': 1,  
-    'num_train_iter': 4000,  
+    'num_train_iter': 500,
     'num_eval_iter': 500,  
     'num_log_iter': 50,  
     'optim': 'AdamW',
@@ -225,7 +232,7 @@
 
 ######################################################################
 # We can start Train the algorithms on CIFAR-10 with 40 labels now.
-# We train for 4000 iterations and evaluate every 500 iterations.
+# We train for 500 iterations and evaluate every 500 iterations.
 # 
 trainer = Trainer(config, algorithm)
 trainer.fit(train_lb_loader, train_ulb_loader, eval_loader)
@@ -239,8 +246,8 @@
 
 
 ######################################################################
-# References
-# [1] USB: https://github.com/microsoft/Semi-supervised-learning
-# [2] Kihyuk Sohn et al. FixMatch: Simplifying Semi-Supervised Learning with Consistency and Confidence
-# [3] Yidong Wang et al. FreeMatch: Self-adaptive Thresholding for Semi-supervised Learning
-# [4] Hao Chen et al. SoftMatch: Addressing the Quantity-Quality Trade-off in Semi-supervised Learning
+# References:
+# - [1] USB: https://github.com/microsoft/Semi-supervised-learning
+# - [2] Kihyuk Sohn et al. FixMatch: Simplifying Semi-Supervised Learning with Consistency and Confidence
+# - [3] Yidong Wang et al. FreeMatch: Self-adaptive Thresholding for Semi-supervised Learning
+# - [4] Hao Chen et al. SoftMatch: Addressing the Quantity-Quality Trade-off in Semi-supervised Learning
diff --git a/beginner_source/colab.rst b/beginner_source/colab.rst
@@ -93,3 +93,11 @@ Hopefully this example will give you a good starting point for running
 some of the more complex tutorials in Colab. As we evolve our use of
 Colab on the PyTorch tutorials site, we'll look at ways to make this
 easier for users.
+
+Enabling CUDA
+~~~~~~~~~~~~~~~~
+Some tutorials require a CUDA-enabled device (NVIDIA GPU), which involves
+changing the Runtime type prior to executing the tutorial.
+To change the Runtime in Google Colab, on the top drop-down menu select **Runtime**,
+then select **Change runtime type**. Under **Hardware accelerator**, select ``T4 GPU``,
+then click ``Save``.
diff --git a/beginner_source/hyperparameter_tuning_tutorial.py b/beginner_source/hyperparameter_tuning_tutorial.py
@@ -48,10 +48,21 @@
 from torch.utils.data import random_split
 import torchvision
 import torchvision.transforms as transforms
+# sphinx_gallery_start_ignore
+# Fixes ``AttributeError: '_LoggingTee' object has no attribute 'fileno'``.
+# This is only needed to run with sphinx-build.
+import sys
+if not hasattr(sys.stdout, "encoding"):
+    sys.stdout.encoding = "latin1"
+    sys.stdout.fileno = lambda: 0
+# sphinx_gallery_end_ignore
 from ray import tune
 from ray.air import Checkpoint, session
 from ray.tune.schedulers import ASHAScheduler
 
+# TODO: Migrate to ray.train.Checkpoint and remove following line
+os.environ["RAY_AIR_NEW_PERSISTENCE_MODE"]="0"
+
 ######################################################################
 # Most of the imports are needed for building the PyTorch model. Only the last three
 # imports are for Ray Tune.
@@ -448,13 +459,6 @@ def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2):
 
 
 if __name__ == "__main__":
-    # sphinx_gallery_start_ignore
-    # Fixes ``AttributeError: '_LoggingTee' object has no attribute 'fileno'``.
-    # This is only needed to run with sphinx-build.
-    import sys
-
-    sys.stdout.fileno = lambda: False
-    # sphinx_gallery_end_ignore
     # You can change the number of GPUs per trial here:
     main(num_samples=10, max_num_epochs=10, gpus_per_trial=0)
 
diff --git a/recipes_source/distributed_device_mesh.rst b/recipes_source/distributed_device_mesh.rst
@@ -14,7 +14,7 @@ Prerequisites:
 
 
 Setting up distributed communicators, i.e. NVIDIA Collective Communication Library (NCCL) communicators, for distributed training can pose a significant challenge. For workloads where users need to compose different parallelisms,
-users would need to manually set up and manage NCCL communicators (for example, :class:`ProcessGroup`) for each parallelism solutions. This process could be complicated and susceptible to errors.
+users would need to manually set up and manage NCCL communicators (for example, :class:`ProcessGroup`) for each parallelism solution. This process could be complicated and susceptible to errors.
 :class:`DeviceMesh` can simplify this process, making it more manageable and less prone to errors.
 
 What is DeviceMesh
@@ -30,7 +30,7 @@ Users can also easily manage the underlying process_groups/devices for multi-dim
 
 Why DeviceMesh is Useful
 ------------------------
-DeviceMesh is useful when working with multi-dimensional parallelism (i.e. 3-D parallel) where parallelism composability is requried. For example, when your parallelism solutions require both communication across hosts and within each host.
+DeviceMesh is useful when working with multi-dimensional parallelism (i.e. 3-D parallel) where parallelism composability is required. For example, when your parallelism solutions require both communication across hosts and within each host.
 The image above shows that we can create a 2D mesh that connects the devices within each host, and connects each device with its counterpart on the other hosts in a homogenous setup.
 
 Without DeviceMesh, users would need to manually set up NCCL communicators, cuda devices on each process before applying any parallelism, which could be quite complicated.
@@ -95,7 +95,7 @@ access the underlying :class:`ProcessGroup` if needed.
     from torch.distributed.device_mesh import init_device_mesh
     mesh_2d = init_device_mesh("cuda", (2, 4), mesh_dim_names=("replicate", "shard"))
 
-    # Users can acess the undelying process group thru `get_group` API.
+    # Users can access the underlying process group thru `get_group` API.
     replicate_group = mesh_2d.get_group(mesh_dim="replicate")
     shard_group = mesh_2d.get_group(mesh_dim="shard")
 
diff --git a/requirements.txt b/requirements.txt
@@ -20,7 +20,7 @@ bs4
 awscliv2==2.1.1
 flask
 spacy==3.4.1
-ray[tune]==2.4.0
+ray[tune]==2.7.2
 tensorboard
 jinja2==3.1.3
 pytorch-lightning