make_distribute_tutorial_work_in_google_colab

venkatram-dev · venkatram-dev · commit 7a7862df46f4 · 2024-08-31T15:13:49.000-07:00
diff --git a/intermediate_source/dist_tuto.rst b/intermediate_source/dist_tuto.rst
@@ -47,6 +47,7 @@ the following template.
     """run.py:"""
     #!/usr/bin/env python
     import os
+    import sys
     import torch
     import torch.distributed as dist
     import torch.multiprocessing as mp
@@ -66,7 +67,11 @@ the following template.
     if __name__ == "__main__":
         size = 2
         processes = []
-        mp.set_start_method("spawn")
+        if "google.colab" in sys.modules:
+            print("Running in Google Colab")
+            mp.get_context("spawn")
+        else:
+            mp.set_start_method("spawn")
         for rank in range(size):
             p = mp.Process(target=init_process, args=(rank, size, run))
             p.start()
@@ -156,7 +161,8 @@ we should not modify the sent tensor nor access the received tensor before ``req
 In other words,
 
 -  writing to ``tensor`` after ``dist.isend()`` will result in undefined behaviour.
--  reading from ``tensor`` after ``dist.irecv()`` will result in undefined behaviour.
+-  reading from ``tensor`` after ``dist.irecv()`` will result in undefined behaviour,
+until ``req.wait()`` has been executed.
 
 However, after ``req.wait()``
 has been executed we are guaranteed that the communication took place,