From 51f640eff56851cdd04016e3a2dc314c72827ef3 Mon Sep 17 00:00:00 2001
From: David Berard <dberard@fb.com>
Date: Fri, 11 Nov 2022 19:25:33 -0800
Subject: [PATCH 1/4] Update FSDP tutorial

* rename default_auto_wrap_policy -> size_based_auto_wrap_policy
* import functools
* indentation
---
 intermediate_source/FSDP_tutorial.rst | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/intermediate_source/FSDP_tutorial.rst b/intermediate_source/FSDP_tutorial.rst
index ecfc0dc4c7c..b2d74b2c202 100644
--- a/intermediate_source/FSDP_tutorial.rst
+++ b/intermediate_source/FSDP_tutorial.rst
@@ -67,6 +67,7 @@ We add the following code snippets to a python script “FSDP_mnist.py”.
     # Based on: https://github.com/pytorch/examples/blob/master/mnist/main.py
     import os
     import argparse
+    import functools
     import torch
     import torch.nn as nn
     import torch.nn.functional as F
@@ -82,14 +83,13 @@ We add the following code snippets to a python script “FSDP_mnist.py”.
     from torch.utils.data.distributed import DistributedSampler
     from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
     from torch.distributed.fsdp.fully_sharded_data_parallel import (
-    FullyShardedDataParallel as FSDP,
-    CPUOffload,
-    BackwardPrefetch,
+        CPUOffload,
+        BackwardPrefetch,
     )
     from torch.distributed.fsdp.wrap import (
-    default_auto_wrap_policy,
-    enable_wrap,
-    wrap,
+        size_based_auto_wrap_policy,
+        enable_wrap,
+        wrap,
     )
 
 1.3 Distributed training setup. As we mentioned FSDP is a type of data parallelism which requires a distributed training environment, so here we use two helper functions to initialize the processes for distributed training and clean up.
@@ -196,7 +196,7 @@ We add the following code snippets to a python script “FSDP_mnist.py”.
         transform=transforms.Compose([
             transforms.ToTensor(),
             transforms.Normalize((0.1307,), (0.3081,))
-            ])
+        ])
 
         dataset1 = datasets.MNIST('../data', train=True, download=True,
                             transform=transform)
@@ -217,7 +217,7 @@ We add the following code snippets to a python script “FSDP_mnist.py”.
         train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
         test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
         my_auto_wrap_policy = functools.partial(
-                default_auto_wrap_policy, min_num_params=100
+                size_based_auto_wrap_policy, min_num_params=100
             )
         torch.cuda.set_device(rank)
         
@@ -248,9 +248,9 @@ We add the following code snippets to a python script “FSDP_mnist.py”.
             # use a barrier to make sure training is done on all ranks
             dist_barrier()
             # state_dict for FSDP model is only available on Nightlies for now
-            States = model.state_dict()
-        if rank == 0:
-            torch.save(states, "mnist_cnn.pt")
+            states = model.state_dict()
+            if rank == 0:
+                torch.save(states, "mnist_cnn.pt")
         
         cleanup()
 
@@ -343,7 +343,7 @@ Finding an optimal auto wrap policy is challenging, PyTorch will add auto tuning
 .. code-block:: python
 
     my_auto_wrap_policy = functools.partial(
-            default_auto_wrap_policy, min_num_params=20000
+            size_based_auto_wrap_policy, min_num_params=20000
         )
     torch.cuda.set_device(rank)
     model = Net().to(rank)

From 5b68901d7661ebd1adfeb9c20eebc54d25444091 Mon Sep 17 00:00:00 2001
From: David Berard <dberard@fb.com>
Date: Fri, 11 Nov 2022 19:41:13 -0800
Subject: [PATCH 2/4] another indentation fix

---
 intermediate_source/FSDP_tutorial.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/intermediate_source/FSDP_tutorial.rst b/intermediate_source/FSDP_tutorial.rst
index b2d74b2c202..7934c2b43e9 100644
--- a/intermediate_source/FSDP_tutorial.rst
+++ b/intermediate_source/FSDP_tutorial.rst
@@ -217,8 +217,8 @@ We add the following code snippets to a python script “FSDP_mnist.py”.
         train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
         test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
         my_auto_wrap_policy = functools.partial(
-                size_based_auto_wrap_policy, min_num_params=100
-            )
+            size_based_auto_wrap_policy, min_num_params=100
+        )
         torch.cuda.set_device(rank)
         
         

From 49253dd6a5dd8cbbb2cf80d1a0e692e29555db9a Mon Sep 17 00:00:00 2001
From: David Berard <dberard@fb.com>
Date: Mon, 14 Nov 2022 09:36:54 -0800
Subject: [PATCH 3/4] Add note about default_auto_wrap_policy ->
 size_based_auto_wrap_policy

---
 intermediate_source/FSDP_tutorial.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/intermediate_source/FSDP_tutorial.rst b/intermediate_source/FSDP_tutorial.rst
index 7934c2b43e9..76c5374699f 100644
--- a/intermediate_source/FSDP_tutorial.rst
+++ b/intermediate_source/FSDP_tutorial.rst
@@ -62,6 +62,9 @@ We add the following code snippets to a python script “FSDP_mnist.py”.
 
 1.2  Import necessary packages
 
+.. note::
+    This tutorial is intended for pytorch versions 1.12 and later. If you are using 1.11 or earlier, replace all instances of `size_based_auto_wrap_policy` with `default_auto_wrap_policy`.
+
 .. code-block:: python
 
     # Based on: https://github.com/pytorch/examples/blob/master/mnist/main.py

From 14afc05d84af2d02097791b265f682b21193a8dc Mon Sep 17 00:00:00 2001
From: David Berard <davidberard98@gmail.com>
Date: Mon, 14 Nov 2022 09:55:26 -0800
Subject: [PATCH 4/4] Use Svetlana's suggestion on wording for the note on
 size_based_auto_wrap_policy

Co-authored-by: Svetlana Karslioglu <svekars@fb.com>
---
 intermediate_source/FSDP_tutorial.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/intermediate_source/FSDP_tutorial.rst b/intermediate_source/FSDP_tutorial.rst
index 76c5374699f..d711637ae34 100644
--- a/intermediate_source/FSDP_tutorial.rst
+++ b/intermediate_source/FSDP_tutorial.rst
@@ -63,7 +63,7 @@ We add the following code snippets to a python script “FSDP_mnist.py”.
 1.2  Import necessary packages
 
 .. note::
-    This tutorial is intended for pytorch versions 1.12 and later. If you are using 1.11 or earlier, replace all instances of `size_based_auto_wrap_policy` with `default_auto_wrap_policy`.
+    This tutorial is intended for PyTorch versions 1.12 and later. If you are using an earlier version, replace all instances of `size_based_auto_wrap_policy` with `default_auto_wrap_policy`.
 
 .. code-block:: python