pytorch
diff --git a/‎.devcontainer/requirements.txt
Lines changed: 1 addition & 1 deletion b/‎.devcontainer/requirements.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎_templates/layout.html
Lines changed: 0 additions & 10 deletions b/‎_templates/layout.html
Lines changed: 0 additions & 10 deletions
diff --git a/‎advanced_source/ddp_pipeline.py
Lines changed: 1 addition & 1 deletion b/‎advanced_source/ddp_pipeline.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎advanced_source/dispatcher.rst
Lines changed: 1 addition & 1 deletion b/‎advanced_source/dispatcher.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎advanced_source/neural_style_tutorial.py
Lines changed: 3 additions & 3 deletions b/‎advanced_source/neural_style_tutorial.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎advanced_source/usb_semisup_learn.py
Lines changed: 1 addition & 1 deletion b/‎advanced_source/usb_semisup_learn.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎beginner_source/basics/autogradqs_tutorial.py
Lines changed: 2 additions & 2 deletions b/‎beginner_source/basics/autogradqs_tutorial.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎beginner_source/basics/buildmodel_tutorial.py
Lines changed: 2 additions & 2 deletions b/‎beginner_source/basics/buildmodel_tutorial.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎beginner_source/basics/data_tutorial.py
Lines changed: 7 additions & 7 deletions b/‎beginner_source/basics/data_tutorial.py
Lines changed: 7 additions & 7 deletions
diff --git a/‎beginner_source/basics/intro.py
Lines changed: 2 additions & 2 deletions b/‎beginner_source/basics/intro.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎beginner_source/basics/tensorqs_tutorial.py
Lines changed: 2 additions & 2 deletions b/‎beginner_source/basics/tensorqs_tutorial.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎beginner_source/blitz/autograd_tutorial.py
Lines changed: 2 additions & 2 deletions b/‎beginner_source/blitz/autograd_tutorial.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎beginner_source/blitz/cifar10_tutorial.py
Lines changed: 1 addition & 1 deletion b/‎beginner_source/blitz/cifar10_tutorial.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎beginner_source/blitz/neural_networks_tutorial.py
Lines changed: 2 additions & 2 deletions b/‎beginner_source/blitz/neural_networks_tutorial.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎beginner_source/blitz/tensor_tutorial.py
Lines changed: 1 addition & 1 deletion b/‎beginner_source/blitz/tensor_tutorial.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎beginner_source/data_loading_tutorial.py
Lines changed: 9 additions & 5 deletions b/‎beginner_source/data_loading_tutorial.py
Lines changed: 9 additions & 5 deletions
diff --git a/‎beginner_source/dcgan_faces_tutorial.py
Lines changed: 2 additions & 2 deletions b/‎beginner_source/dcgan_faces_tutorial.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎beginner_source/ddp_series_fault_tolerance.rst
Lines changed: 0 additions & 4 deletions b/‎beginner_source/ddp_series_fault_tolerance.rst
Lines changed: 0 additions & 4 deletions
diff --git a/‎beginner_source/ddp_series_multigpu.rst
Lines changed: 0 additions & 1 deletion b/‎beginner_source/ddp_series_multigpu.rst
Lines changed: 0 additions & 1 deletion
diff --git a/‎beginner_source/dist_overview.rst
Lines changed: 17 additions & 2 deletions b/‎beginner_source/dist_overview.rst
Lines changed: 17 additions & 2 deletions
diff --git a/‎beginner_source/hyperparameter_tuning_tutorial.py
Lines changed: 1 addition & 1 deletion b/‎beginner_source/hyperparameter_tuning_tutorial.py
Lines changed: 1 addition & 1 deletion
@@ -24,7 +24,7 @@ ipython
 # to run examples
 pandas
 scikit-image
-pillow==10.0.1
+pillow==10.2.0
 wget
 
 # for codespaces env
 
@@ -112,14 +112,4 @@
 </script>
 
 <img height="1" width="1" style="border-style:none;" alt="" src="https://www.googleadservices.com/pagead/conversion/795629140/?label=txkmCPmdtosBENSssfsC&amp;guid=ON&amp;script=0"/>
-
-//temporarily add a link to survey
-<script>
-    var survey = '<div class="survey-banner"><p><i class="fas fa-poll" aria-hidden="true">&nbsp </i> Take the annual <a href="https://forms.gle/jdNexNU6eZ8mCGDY7">PyTorch Tutorials survey</a>.</p></div>'
-    if ($(".pytorch-call-to-action-links").length) {
-        $(".pytorch-call-to-action-links").before(survey);
-    } else {
-        $("#pytorch-article").prepend(survey);
-    }
-</script>
 {% endblock %}
@@ -439,7 +439,7 @@ def evaluate(eval_model, data_source):
 
 ######################################################################
 # Evaluate the model with the test dataset
-# -------------------------------------
+# ----------------------------------------
 #
 # Apply the best model to check the result with the test dataset.
 
 
@@ -129,7 +129,7 @@ for debugging in larger models where previously it can be hard to pin-point
 exactly where the ``requires_grad``-ness is lost during the forward pass.
 
 In-place or view ops
-^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^
 
 To ensure correctness and best possible performance, if your op mutates an input
 in-place or returns a tensor that aliases with one of the inputs, two additional
 
@@ -87,7 +87,7 @@
 # to 255 tensor images. 
 #
 #
-# .. Note::
+# .. note::
 #     Here are links to download the images required to run the tutorial:
 #     `picasso.jpg <https://pytorch.org/tutorials/_static/img/neural-style/picasso.jpg>`__ and
 #     `dancing.jpg <https://pytorch.org/tutorials/_static/img/neural-style/dancing.jpg>`__.
@@ -183,7 +183,7 @@ def forward(self, input):
         return input
 
 ######################################################################
-# .. Note::
+# .. note::
 #    **Important detail**: although this module is named ``ContentLoss``, it
 #    is not a true PyTorch Loss function. If you want to define your content
 #    loss as a PyTorch Loss function, you have to create a PyTorch autograd function 
@@ -372,7 +372,7 @@ def get_style_model_and_losses(cnn, normalization_mean, normalization_std,
 input_img = content_img.clone()
 # if you want to use white noise by using the following code:
 #
-# ::
+# .. code-block:: python
 #
 #    input_img = torch.randn(content_img.data.size())
 
 
@@ -157,7 +157,7 @@
 
 ######################################################################
 # Use USB to Train ``SoftMatch`` with specific imbalanced algorithm on imbalanced CIFAR-10
-# ------------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------------------
 # 
 # Now let's say we have imbalanced labeled set and unlabeled set of CIFAR-10,
 # and we want to train a ``SoftMatch`` model on it.
 
@@ -10,7 +10,7 @@
 `Save & Load Model <saveloadrun_tutorial.html>`_
 
 Automatic Differentiation with ``torch.autograd``
-=======================================
+=================================================
 
 When training neural networks, the most frequently used algorithm is
 **back propagation**. In this algorithm, parameters (model weights) are
@@ -170,7 +170,7 @@
 
 ######################################################################
 # Optional Reading: Tensor Gradients and Jacobian Products
-# --------------------------------------
+# --------------------------------------------------------
 #
 # In many cases, we have a scalar loss function, and we need to compute
 # the gradient with respect to some parameters. However, there are cases
 
@@ -10,7 +10,7 @@
 `Save & Load Model <saveloadrun_tutorial.html>`_
 
 Build the Neural Network
-===================
+========================
 
 Neural networks comprise of layers/modules that perform operations on data.
 The `torch.nn <https://pytorch.org/docs/stable/nn.html>`_ namespace provides all the building blocks you need to
@@ -197,5 +197,5 @@ def forward(self, x):
 
 #################################################################
 # Further Reading
-# --------------
+# -----------------
 # - `torch.nn API <https://pytorch.org/docs/stable/nn.html>`_
@@ -10,7 +10,7 @@
 `Save & Load Model <saveloadrun_tutorial.html>`_
 
 Datasets & DataLoaders
-===================
+======================
 
 """
 
@@ -69,7 +69,7 @@
 
 #################################################################
 # Iterating and Visualizing the Dataset
-# -----------------
+# -------------------------------------
 #
 # We can index ``Datasets`` manually like a list: ``training_data[index]``.
 # We use ``matplotlib`` to visualize some samples in our training data.
@@ -144,7 +144,7 @@ def __getitem__(self, idx):
 
 
 #################################################################
-# __init__
+# ``__init__``
 # ^^^^^^^^^^^^^^^^^^^^
 #
 # The __init__ function is run once when instantiating the Dataset object. We initialize
@@ -167,7 +167,7 @@ def __init__(self, annotations_file, img_dir, transform=None, target_transform=N
 
 
 #################################################################
-# __len__
+# ``__len__``
 # ^^^^^^^^^^^^^^^^^^^^
 #
 # The __len__ function returns the number of samples in our dataset.
@@ -180,7 +180,7 @@ def __len__(self):
 
 
 #################################################################
-# __getitem__
+# ``__getitem__``
 # ^^^^^^^^^^^^^^^^^^^^
 #
 # The __getitem__ function loads and returns a sample from the dataset at the given index ``idx``.
@@ -220,7 +220,7 @@ def __getitem__(self, idx):
 
 ###########################
 # Iterate through the DataLoader
-# --------------------------
+# -------------------------------
 #
 # We have loaded that dataset into the ``DataLoader`` and can iterate through the dataset as needed.
 # Each iteration below returns a batch of ``train_features`` and ``train_labels`` (containing ``batch_size=64`` features and labels respectively).
@@ -243,5 +243,5 @@ def __getitem__(self, idx):
 
 #################################################################
 # Further Reading
-# --------------
+# ----------------
 # - `torch.utils.data API <https://pytorch.org/docs/stable/data.html>`_
@@ -31,15 +31,15 @@
 
 
 Running the Tutorial Code
-------------------
+-------------------------
 You can run this tutorial in a couple of ways:
 
 - **In the cloud**: This is the easiest way to get started! Each section has a "Run in Microsoft Learn" and "Run in Google Colab" link at the top, which opens an integrated notebook in Microsoft Learn or Google Colab, respectively, with the code in a fully-hosted environment.
 - **Locally**: This option requires you to setup PyTorch and TorchVision first on your local machine (`installation instructions <https://pytorch.org/get-started/locally/>`_). Download the notebook or copy the code into your favorite IDE.
 
 
 How to Use this Guide
------------------
+---------------------
 If you're familiar with other deep learning frameworks, check out the `0. Quickstart <quickstart_tutorial.html>`_ first
 to quickly familiarize yourself with PyTorch's API.
 
 
@@ -80,7 +80,7 @@
 
 ######################################################################
 # Attributes of a Tensor
-# ~~~~~~~~~~~~~~~~~
+# ~~~~~~~~~~~~~~~~~~~~~~
 #
 # Tensor attributes describe their shape, datatype, and the device on which they are stored.
 
@@ -97,7 +97,7 @@
 
 ######################################################################
 # Operations on Tensors
-# ~~~~~~~~~~~~~~~~~
+# ~~~~~~~~~~~~~~~~~~~~~~~
 #
 # Over 100 tensor operations, including arithmetic, linear algebra, matrix manipulation (transposing,
 # indexing, slicing), sampling and more are
 
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 """
 A Gentle Introduction to ``torch.autograd``
----------------------------------
+===========================================
 
 ``torch.autograd`` is PyTorch’s automatic differentiation engine that powers
 neural network training. In this section, you will get a conceptual
@@ -149,7 +149,7 @@
 
 ######################################################################
 # Optional Reading - Vector Calculus using ``autograd``
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 #
 # Mathematically, if you have a vector valued function
 # :math:`\vec{y}=f(\vec{x})`, then the gradient of :math:`\vec{y}` with
 
@@ -115,7 +115,7 @@ def imshow(img):
 
 ########################################################################
 # 2. Define a Convolutional Neural Network
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 # Copy the neural network from the Neural Networks section before and modify it to
 # take 3-channel images (instead of 1-channel images as it was defined).
 
 
@@ -161,7 +161,7 @@ def forward(self, x):
 # ``.grad_fn`` attribute, you will see a graph of computations that looks
 # like this:
 #
-# ::
+# .. code-block:: sh
 #
 #     input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d
 #           -> flatten -> linear -> relu -> linear -> relu -> linear
@@ -253,7 +253,7 @@ def forward(self, x):
 
 
 ###############################################################
-# .. Note::
+# .. note::
 #
 #       Observe how gradient buffers had to be manually set to zero using
 #       ``optimizer.zero_grad()``. This is because gradients are accumulated
 
@@ -1,6 +1,6 @@
 """
 Tensors
---------------------------------------------
+========
 
 Tensors are a specialized data structure that are very similar to arrays
 and matrices. In PyTorch, we use tensors to encode the inputs and
 
@@ -50,9 +50,9 @@
 #     estimation <https://blog.dlib.net/2014/08/real-time-face-pose-estimation.html>`__
 #     on a few images from imagenet tagged as 'face'.
 #
-# Dataset comes with a csv file with annotations which looks like this:
+# Dataset comes with a ``.csv`` file with annotations which looks like this:
 #
-# ::
+# .. code-block:: sh
 #
 #     image_name,part_0_x,part_0_y,part_1_x,part_1_y,part_2_x, ... ,part_67_x,part_67_y
 #     0805personali01.jpg,27,83,27,98, ... 84,134
@@ -196,7 +196,7 @@ def __getitem__(self, idx):
 # called. For this, we just need to implement ``__call__`` method and
 # if required, ``__init__`` method. We can then use a transform like this:
 #
-# ::
+# .. code-block:: python
 #
 #     tsfm = Transform(params)
 #     transformed_sample = tsfm(sample)
@@ -421,7 +421,9 @@ def show_landmarks_batch(sample_batched):
 # and dataloader. ``torchvision`` package provides some common datasets and
 # transforms. You might not even have to write custom classes. One of the
 # more generic datasets available in torchvision is ``ImageFolder``.
-# It assumes that images are organized in the following way: ::
+# It assumes that images are organized in the following way:
+#
+# .. code-block:: sh
 #
 #     root/ants/xxx.png
 #     root/ants/xxy.jpeg
@@ -435,7 +437,9 @@ def show_landmarks_batch(sample_batched):
 #
 # where 'ants', 'bees' etc. are class labels. Similarly generic transforms
 # which operate on ``PIL.Image`` like  ``RandomHorizontalFlip``, ``Scale``,
-# are also available. You can use these to write a dataloader like this: ::
+# are also available. You can use these to write a dataloader like this:
+#
+# .. code-block:: pytorch
 #
 #    import torch
 #    from torchvision import transforms, datasets
 
@@ -226,7 +226,7 @@
 # the ``celeba`` directory you just created. The resulting directory
 # structure should be:
 # 
-# ::
+# .. code-block:: sh
 # 
 #    /path/to/celeba
 #        -> img_align_celeba  
@@ -265,7 +265,7 @@
 plt.axis("off")
 plt.title("Training Images")
 plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:64], padding=2, normalize=True).cpu(),(1,2,0)))
-
+plt.show()
 
 
 ######################################################################
 
@@ -93,11 +93,7 @@ In elastic training, whenever there are any membership changes (adding or removi
 on available devices. Having this structure ensures your training job can continue without manual intervention.
 
 
-
-
-
 Diff for `multigpu.py <https://github.com/pytorch/examples/blob/main/distributed/ddp-tutorial-series/multigpu.py>`__ v/s `multigpu_torchrun.py <https://github.com/pytorch/examples/blob/main/distributed/ddp-tutorial-series/multigpu_torchrun.py>`__
------------------------------------------------------------
 
 Process group initialization
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -52,7 +52,6 @@ Along the way, we will talk through important concepts in distributed training w
 
 
 Diff for `single_gpu.py <https://github.com/pytorch/examples/blob/main/distributed/ddp-tutorial-series/single_gpu.py>`__ v/s `multigpu.py <https://github.com/pytorch/examples/blob/main/distributed/ddp-tutorial-series/multigpu.py>`__
-----------------------------------------------------
 
 These are the changes you typically make to a single-GPU training script to enable DDP.
 
 
@@ -74,7 +74,10 @@ common development trajectory would be:
 4. Use multi-machine `DistributedDataParallel <https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html>`__
    and the `launching script <https://github.com/pytorch/examples/blob/master/distributed/ddp/README.md>`__,
    if the application needs to scale across machine boundaries.
-5. Use `torch.distributed.elastic <https://pytorch.org/docs/stable/distributed.elastic.html>`__
+5. Use multi-GPU `FullyShardedDataParallel <https://pytorch.org/docs/stable/fsdp.html>`__
+   training on a single-machine or multi-machine when the data and model cannot
+   fit on one GPU.
+6. Use `torch.distributed.elastic <https://pytorch.org/docs/stable/distributed.elastic.html>`__
    to launch distributed training if errors (e.g., out-of-memory) are expected or if
    resources can join and leave dynamically during training.
 
@@ -134,6 +137,18 @@ DDP materials are listed below:
 5. The `Distributed Training with Uneven Inputs Using the Join Context Manager <../advanced/generic_join.html>`__
    tutorial walks through using the generic join context for distributed training with uneven inputs.
 
+
+``torch.distributed.FullyShardedDataParallel``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The `FullyShardedDataParallel <https://pytorch.org/docs/stable/fsdp.html>`__
+(FSDP) is a type of data parallelism paradigm which maintains a per-GPU copy of a model’s
+parameters, gradients and optimizer states, it shards all of these states across
+data-parallel workers. The support for FSDP was added starting PyTorch v1.11. The tutorial
+`Getting Started with FSDP <https://pytorch.org/tutorials/intermediate/FSDP_tutorial.html>`__
+provides in depth explanation and example of how FSDP works.
+
+
 torch.distributed.elastic
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -150,7 +165,7 @@ throws an exception, it is likely to lead to desynchronization (mismatched
 adds fault tolerance and the ability to make use of a dynamic pool of machines (elasticity).
 
 RPC-Based Distributed Training
-----------------------------
+------------------------------
 
 Many training paradigms do not fit into data parallelism, e.g.,
 parameter server paradigm, distributed pipeline parallelism, reinforcement
 
@@ -462,7 +462,7 @@ def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2):
 ######################################################################
 # If you run the code, an example output could look like this:
 #
-# ::
+# .. code-block:: sh
 #
 #     Number of trials: 10/10 (10 TERMINATED)
 #     +-----+--------------+------+------+-------------+--------+---------+------------+
Original file line number	Diff line number	Diff line change
`@@ -439,7 +439,7 @@ def evaluate(eval_model, data_source):`
`439`	`439`
`440`	`440`	`######################################################################`
`441`	`441`	`# Evaluate the model with the test dataset`
`442`		`-# -------------------------------------`
	`442`	`+# ----------------------------------------`
`443`	`443`	`#`
`444`	`444`	`# Apply the best model to check the result with the test dataset.`
`445`	`445`
Original file line number	Diff line number	Diff line change
`@@ -87,7 +87,7 @@`
`87`	`87`	`# to 255 tensor images.`
`88`	`88`	`#`
`89`	`89`	`#`
`90`		`-# .. Note::`
	`90`	`+# .. note::`
`91`	`91`	`# Here are links to download the images required to run the tutorial:`
`92`	`92`	# `picasso.jpg <https://pytorch.org/tutorials/_static/img/neural-style/picasso.jpg>`__ and
`93`	`93`	# `dancing.jpg <https://pytorch.org/tutorials/_static/img/neural-style/dancing.jpg>`__.
`@@ -183,7 +183,7 @@ def forward(self, input):`
`183`	`183`	`return input`
`184`	`184`
`185`	`185`	`######################################################################`
`186`		`-# .. Note::`
	`186`	`+# .. note::`
`187`	`187`	# Important detail: although this module is named ``ContentLoss``, it
`188`	`188`	`# is not a true PyTorch Loss function. If you want to define your content`
`189`	`189`	`# loss as a PyTorch Loss function, you have to create a PyTorch autograd function`
`@@ -372,7 +372,7 @@ def get_style_model_and_losses(cnn, normalization_mean, normalization_std,`
`372`	`372`	`input_img = content_img.clone()`
`373`	`373`	`# if you want to use white noise by using the following code:`
`374`	`374`	`#`
`375`		`-# ::`
	`375`	`+# .. code-block:: python`
`376`	`376`	`#`
`377`	`377`	`# input_img = torch.randn(content_img.data.size())`
`378`	`378`
Original file line number	Diff line number	Diff line change
`@@ -157,7 +157,7 @@`
`157`	`157`
`158`	`158`	`######################################################################`
`159`	`159`	# Use USB to Train ``SoftMatch`` with specific imbalanced algorithm on imbalanced CIFAR-10
`160`		`-# ------------------------------------------------------------------------------------`
	`160`	`+# ----------------------------------------------------------------------------------------`
`161`	`161`	`#`
`162`	`162`	`# Now let's say we have imbalanced labeled set and unlabeled set of CIFAR-10,`
`163`	`163`	# and we want to train a ``SoftMatch`` model on it.
Original file line number	Diff line number	Diff line change
`@@ -80,7 +80,7 @@`
`80`	`80`
`81`	`81`	`######################################################################`
`82`	`82`	`# Attributes of a Tensor`
`83`		`-# ~~~~~~~~~~~~~~~~~`
	`83`	`+# ~~~~~~~~~~~~~~~~~~~~~~`
`84`	`84`	`#`
`85`	`85`	`# Tensor attributes describe their shape, datatype, and the device on which they are stored.`
`86`	`86`
`@@ -97,7 +97,7 @@`
`97`	`97`
`98`	`98`	`######################################################################`
`99`	`99`	`# Operations on Tensors`
`100`		`-# ~~~~~~~~~~~~~~~~~`
	`100`	`+# ~~~~~~~~~~~~~~~~~~~~~~~`
`101`	`101`	`#`
`102`	`102`	`# Over 100 tensor operations, including arithmetic, linear algebra, matrix manipulation (transposing,`
`103`	`103`	`# indexing, slicing), sampling and more are`
Original file line number	Diff line number	Diff line change
`@@ -161,7 +161,7 @@ def forward(self, x):`
`161`	`161`	# ``.grad_fn`` attribute, you will see a graph of computations that looks
`162`	`162`	`# like this:`
`163`	`163`	`#`
`164`		`-# ::`
	`164`	`+# .. code-block:: sh`
`165`	`165`	`#`
`166`	`166`	`# input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d`
`167`	`167`	`# -> flatten -> linear -> relu -> linear -> relu -> linear`
`@@ -253,7 +253,7 @@ def forward(self, x):`
`253`	`253`
`254`	`254`
`255`	`255`	`###############################################################`
`256`		`-# .. Note::`
	`256`	`+# .. note::`
`257`	`257`	`#`
`258`	`258`	`# Observe how gradient buffers had to be manually set to zero using`
`259`	`259`	# ``optimizer.zero_grad()``. This is because gradients are accumulated
Original file line number	Diff line number	Diff line change
`@@ -50,9 +50,9 @@`
`50`	`50`	# estimation <https://blog.dlib.net/2014/08/real-time-face-pose-estimation.html>`__
`51`	`51`	`# on a few images from imagenet tagged as 'face'.`
`52`	`52`	`#`
`53`		`-# Dataset comes with a csv file with annotations which looks like this:`
	`53`	+# Dataset comes with a ``.csv`` file with annotations which looks like this:
`54`	`54`	`#`
`55`		`-# ::`
	`55`	`+# .. code-block:: sh`
`56`	`56`	`#`
`57`	`57`	`# image_name,part_0_x,part_0_y,part_1_x,part_1_y,part_2_x, ... ,part_67_x,part_67_y`
`58`	`58`	`# 0805personali01.jpg,27,83,27,98, ... 84,134`
`@@ -196,7 +196,7 @@ def __getitem__(self, idx):`
`196`	`196`	# called. For this, we just need to implement ``__call__`` method and
`197`	`197`	# if required, ``__init__`` method. We can then use a transform like this:
`198`	`198`	`#`
`199`		`-# ::`
	`199`	`+# .. code-block:: python`
`200`	`200`	`#`
`201`	`201`	`# tsfm = Transform(params)`
`202`	`202`	`# transformed_sample = tsfm(sample)`
`@@ -421,7 +421,9 @@ def show_landmarks_batch(sample_batched):`
`421`	`421`	# and dataloader. ``torchvision`` package provides some common datasets and
`422`	`422`	`# transforms. You might not even have to write custom classes. One of the`
`423`	`423`	# more generic datasets available in torchvision is ``ImageFolder``.
`424`		`-# It assumes that images are organized in the following way: ::`
	`424`	`+# It assumes that images are organized in the following way:`
	`425`	`+#`
	`426`	`+# .. code-block:: sh`
`425`	`427`	`#`
`426`	`428`	`# root/ants/xxx.png`
`427`	`429`	`# root/ants/xxy.jpeg`
`@@ -435,7 +437,9 @@ def show_landmarks_batch(sample_batched):`
`435`	`437`	`#`
`436`	`438`	`# where 'ants', 'bees' etc. are class labels. Similarly generic transforms`
`437`	`439`	# which operate on ``PIL.Image`` like ``RandomHorizontalFlip``, ``Scale``,
`438`		`-# are also available. You can use these to write a dataloader like this: ::`
	`440`	`+# are also available. You can use these to write a dataloader like this:`
	`441`	`+#`
	`442`	`+# .. code-block:: pytorch`
`439`	`443`	`#`
`440`	`444`	`# import torch`
`441`	`445`	`# from torchvision import transforms, datasets`
Original file line number	Diff line number	Diff line change
`@@ -52,7 +52,6 @@ Along the way, we will talk through important concepts in distributed training w`
`52`	`52`
`53`	`53`
`54`	`54`	Diff for `single_gpu.py <https://github.com/pytorch/examples/blob/main/distributed/ddp-tutorial-series/single_gpu.py>`__ v/s `multigpu.py <https://github.com/pytorch/examples/blob/main/distributed/ddp-tutorial-series/multigpu.py>`__
`55`		`-----------------------------------------------------`
`56`	`55`
`57`	`56`	`These are the changes you typically make to a single-GPU training script to enable DDP.`
`58`	`57`
Original file line number	Diff line number	Diff line change
`@@ -462,7 +462,7 @@ def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2):`
`462`	`462`	`######################################################################`
`463`	`463`	`# If you run the code, an example output could look like this:`
`464`	`464`	`#`
`465`		`-# ::`
	`465`	`+# .. code-block:: sh`
`466`	`466`	`#`
`467`	`467`	`# Number of trials: 10/10 (10 TERMINATED)`
`468`	`468`	`# +-----+--------------+------+------+-------------+--------+---------+------------+`