From cfefbb2677b016b8434f8f7413b44c4005d82bcc Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Tue, 9 Aug 2022 14:43:09 -0700 Subject: [PATCH 1/9] Add Distributed tutorials landing page --- _static/css/custom.css | 68 +++++++++++++++++++++++++++++++++++++ conf.py | 21 +++++++----- distributed/home.rst | 77 ++++++++++++++++++++++++++++++++++++++++++ index.rst | 1 + 4 files changed, 159 insertions(+), 8 deletions(-) create mode 100755 _static/css/custom.css create mode 100644 distributed/home.rst diff --git a/_static/css/custom.css b/_static/css/custom.css new file mode 100755 index 00000000000..11450d0fdaa --- /dev/null +++ b/_static/css/custom.css @@ -0,0 +1,68 @@ +/* sphinx-design styles for cards/tabs +*/ + +:root { + --sd-color-primary: #6c6c6d; + --sd-color-primary-highlight: #f3f4f7; + --sd-color-card-border-hover: #ee4c2c; + --sd-color-card-border: #f3f4f7; + --sd-color-card-background: #f3f4f7; + --sd-color-card-text: inherit; + --sd-color-card-header: transparent; + --sd-color-card-footer: transparent; + --sd-color-tabs-label-active: hsla(231, 99%, 66%, 1); + --sd-color-tabs-label-hover: hsla(231, 99%, 66%, 1); + --sd-color-tabs-label-inactive: hsl(0, 0%, 66%); + --sd-color-tabs-underline-active: hsla(231, 99%, 66%, 1); + --sd-color-tabs-underline-hover: rgba(178, 206, 245, 0.62); + --sd-color-tabs-underline-inactive: transparent; + --sd-color-tabs-overline: rgb(222, 222, 222); + --sd-color-tabs-underline: rgb(222, 222, 222); +} + +.sd-card { + position: relative; + background-color: #f3f4f7; + opacity: 0.5; + border-radius: 0px; + width: 30%; +} + +.sd-card-img { + opacity: 0.5; + width: 200px; + padding: 0px; +} + +.sd-card-img:hover { + opacity: 1.0; + background-color: #f3f4f7; +} + +.sd-card:after { + display:block; + opacity: 1; + content: ''; + background-color: #fff; + border: none; + border-bottom: solid 1px #ee4c2c; + transform: scaleX(0); + transition: transform .250s ease-in-out; +} + +.sd-card:hover { + background-color: #fff; + opacity: 1; + border: none; + border-bottom: solid 1px #ee4c2c; + transition: transform .250s ease-in-out; +} + +.sd-card:hover:after { + border: none; + transform: scaleX(1); +} + +.sd-card:after { + transform-origin: 0% 50%; +} diff --git a/conf.py b/conf.py index 41e3ee3e56c..a6fdbad6b2e 100644 --- a/conf.py +++ b/conf.py @@ -71,6 +71,7 @@ 'sphinx.ext.intersphinx', 'sphinx_copybutton', 'sphinx_gallery.gen_gallery', + 'sphinx_design' ] intersphinx_mapping = { @@ -263,22 +264,26 @@ 'Miscellaneous'), ] +html_css_files = [ + 'https://cdn.jsdelivr.net/npm/katex@0.10.0-beta/dist/katex.min.css', + 'css/custom.css' + ] def setup(app): # NOTE: in Sphinx 1.8+ `html_css_files` is an official configuration value # and can be moved outside of this function (and the setup(app) function # can be deleted). - html_css_files = [ - 'https://cdn.jsdelivr.net/npm/katex@0.10.0-beta/dist/katex.min.css' - ] + #html_css_files = [ + # 'https://cdn.jsdelivr.net/npm/katex@0.10.0-beta/dist/katex.min.css', + # 'css/custom.css' + #] # In Sphinx 1.8 it was renamed to `add_css_file`, 1.7 and prior it is # `add_stylesheet` (deprecated in 1.8). - add_css = getattr(app, 'add_css_file', app.add_stylesheet) - for css_file in html_css_files: - add_css(css_file) - + #add_css = getattr(app, 'add_css_file', app.add_stylesheet) + #for css_file in html_css_files: + # add_css(css_file) # Custom CSS - # app.add_stylesheet('css/pytorch_theme.css') + #app.add_stylesheet('css/pytorch_theme.css') # app.add_stylesheet('https://fonts.googleapis.com/css?family=Lato') # Custom directives app.add_directive('includenodoc', IncludeDirective) diff --git a/distributed/home.rst b/distributed/home.rst new file mode 100644 index 00000000000..788a5fd9bce --- /dev/null +++ b/distributed/home.rst @@ -0,0 +1,77 @@ +Distributed and Parallel Training Tutorials +=========================================== + +This page includes all distributed and parallel trainings available +at pytorch.org website. + +Getting Started with Distributed Data-Parallel Training (DDP) +------------------------------------------------------------- + +.. grid:: 3 + + .. grid-item-card:: Getting Started with PyTorch Distributed + :shadow: none + :link: https://example.com + :link-type: url + + This tutorial provides a gentle intro to the PyTorch + DistributedData Parallel. + + .. grid-item-card:: Single Machine Model Parallel Best Practices + :shadow: none + :link: https://example.com + :link-type: url + + In this tutorial you will learn about best practices in + using model parallel. + + .. grid-item-card:: Writing Distributed Applications with PyTorch + :shadow: none + :link: https://example.com + :link-type: url + + This tutorial demonstrates how to write a distributed application + with PyTorch. + +Learn FSDP +---------- + +Fully-Sharded Data Parallel (FSDP) is a tool that distributes model +parameters across multiple workers, therefore enabling you to train larger +models. + + +.. grid:: 3 + + .. grid-item-card:: Getting Started with FSDP + :shadow: none + :img-top: ../_static/img/thumbnails/cropped/pytorch-logo.png + :link: https://example.com + :link-type: url + + This tutorial provides a gentle intro to the PyTorch + DistributedData Parallel. + + .. grid-item-card:: Single Machine Model Parallel Best Practices + :shadow: none + :img-top: ../_static/img/thumbnails/cropped/pytorch-logo.png + :link: https://example.com + :link-type: url + + In this tutorial you will learn about best practices in + using model parallel. + + .. grid-item-card:: Writing Distributed Applications with PyTorch + :shadow: none + :img-top: ../_static/img/thumbnails/cropped/pytorch-logo.png + :link: https://example.com + :link-type: url + + This tutorial demonstrates how to write a distributed application + with PyTorch. + +Learn RPC +--------- + +Distributed Remote Procedure Call (RPC) framework provides +mechanisms for multi-machine model training diff --git a/index.rst b/index.rst index 89f04219d87..e1884fbc347 100644 --- a/index.rst +++ b/index.rst @@ -886,6 +886,7 @@ Additional Resources :hidden: :caption: Parallel and Distributed Training + distributed/home beginner/dist_overview intermediate/model_parallel_tutorial intermediate/ddp_tutorial From 930a794c9901a50937046338589eea2021838bb6 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 19 Sep 2022 10:59:43 -0700 Subject: [PATCH 2/9] Rebase --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index dd632f368a4..8362ab3c233 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ # Refer to ./jenkins/build.sh for tutorial build instructions sphinx==5.0.0 -sphinx-gallery==0.11.1 -sphinx-panels +sphinx-gallery==0.9.0 +sphinx_design docutils==0.16 sphinx-copybutton tqdm From 6d724dcaa4a6f1c4754bcd10aaa333c42ecba713 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Wed, 10 Aug 2022 12:12:07 -0700 Subject: [PATCH 3/9] Update --- _static/css/custom.css | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/_static/css/custom.css b/_static/css/custom.css index 11450d0fdaa..24b07ce8bda 100755 --- a/_static/css/custom.css +++ b/_static/css/custom.css @@ -26,8 +26,10 @@ opacity: 0.5; border-radius: 0px; width: 30%; + border: none } + .sd-card-img { opacity: 0.5; width: 200px; @@ -39,30 +41,23 @@ background-color: #f3f4f7; } + .sd-card:after { - display:block; - opacity: 1; - content: ''; - background-color: #fff; - border: none; - border-bottom: solid 1px #ee4c2c; - transform: scaleX(0); - transition: transform .250s ease-in-out; + display: block; + opacity: 1; + content: ''; + border-bottom: solid 1px #ee4c2c; + background-color: #fff; + transform: scaleX(0); + transition: transform .250s ease-in-out; + transform-origin: 0% 50%; } .sd-card:hover { - background-color: #fff; - opacity: 1; - border: none; - border-bottom: solid 1px #ee4c2c; - transition: transform .250s ease-in-out; + background-color: #fff; + opacity: 1; } .sd-card:hover:after { - border: none; transform: scaleX(1); } - -.sd-card:after { - transform-origin: 0% 50%; -} From 20712ff824480a1d58cf316aea73f77b5ccf2f77 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Wed, 10 Aug 2022 13:30:42 -0700 Subject: [PATCH 4/9] Update --- _static/css/custom.css | 3 +++ 1 file changed, 3 insertions(+) diff --git a/_static/css/custom.css b/_static/css/custom.css index 24b07ce8bda..542c4302072 100755 --- a/_static/css/custom.css +++ b/_static/css/custom.css @@ -56,6 +56,9 @@ .sd-card:hover { background-color: #fff; opacity: 1; + border-top: 1px solid #f3f4f7; + border-left: 1px solid #f3f4f7; + border-right: 1px solid #f3f4f7; } .sd-card:hover:after { From 11533db05f74d5fc2bb38636070f4577ee7e34c1 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Thu, 11 Aug 2022 14:26:20 -0700 Subject: [PATCH 5/9] Updated card --- distributed/home.rst | 45 ++++++++++++++------------------------------ 1 file changed, 14 insertions(+), 31 deletions(-) diff --git a/distributed/home.rst b/distributed/home.rst index 788a5fd9bce..e45162649de 100644 --- a/distributed/home.rst +++ b/distributed/home.rst @@ -1,23 +1,29 @@ Distributed and Parallel Training Tutorials =========================================== -This page includes all distributed and parallel trainings available +Distributed and at pytorch.org website. Getting Started with Distributed Data-Parallel Training (DDP) ------------------------------------------------------------- + + .. grid:: 3 - .. grid-item-card:: Getting Started with PyTorch Distributed + .. grid-item-card:: :octicon:`file-code;1em` + Getting Started with PyTorch Distributed :shadow: none :link: https://example.com :link-type: url This tutorial provides a gentle intro to the PyTorch DistributedData Parallel. + + :octicon:`code;1em` Code - .. grid-item-card:: Single Machine Model Parallel Best Practices + .. grid-item-card:: :octicon:`file-code;1em` + Single Machine Model Parallel Best Practices :shadow: none :link: https://example.com :link-type: url @@ -25,7 +31,9 @@ Getting Started with Distributed Data-Parallel Training (DDP) In this tutorial you will learn about best practices in using model parallel. - .. grid-item-card:: Writing Distributed Applications with PyTorch + :octicon:`code;1em` Code :octicon:`square-fill;1em` :octicon:`video;1em` Video + + .. grid-item-card:: :octicon:`file-code;1em` Writing Distributed Applications with PyTorch :shadow: none :link: https://example.com :link-type: url @@ -33,6 +41,8 @@ Getting Started with Distributed Data-Parallel Training (DDP) This tutorial demonstrates how to write a distributed application with PyTorch. + :octicon:`code;1em` Code :octicon:`square-fill;1em` :octicon:`video;1em` Video + Learn FSDP ---------- @@ -43,33 +53,6 @@ models. .. grid:: 3 - .. grid-item-card:: Getting Started with FSDP - :shadow: none - :img-top: ../_static/img/thumbnails/cropped/pytorch-logo.png - :link: https://example.com - :link-type: url - - This tutorial provides a gentle intro to the PyTorch - DistributedData Parallel. - - .. grid-item-card:: Single Machine Model Parallel Best Practices - :shadow: none - :img-top: ../_static/img/thumbnails/cropped/pytorch-logo.png - :link: https://example.com - :link-type: url - - In this tutorial you will learn about best practices in - using model parallel. - - .. grid-item-card:: Writing Distributed Applications with PyTorch - :shadow: none - :img-top: ../_static/img/thumbnails/cropped/pytorch-logo.png - :link: https://example.com - :link-type: url - - This tutorial demonstrates how to write a distributed application - with PyTorch. - Learn RPC --------- From 9ce02f83cecd8379bb01acdba1fa8f65433d530c Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Fri, 19 Aug 2022 15:29:57 -0700 Subject: [PATCH 6/9] Update --- distributed/home.rst | 139 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 115 insertions(+), 24 deletions(-) diff --git a/distributed/home.rst b/distributed/home.rst index e45162649de..39eb7419c47 100644 --- a/distributed/home.rst +++ b/distributed/home.rst @@ -1,47 +1,53 @@ Distributed and Parallel Training Tutorials =========================================== -Distributed and -at pytorch.org website. +Distributed training is a model training paradigm that involves +spreading training workload across multiple worker nodes, therefore +significantly improving the speed of training and model accuracy. While +distributed training can be used for any type of ML model training, it +is most beneficial to use it for large models and compute demanding +tasks as deep learning. -Getting Started with Distributed Data-Parallel Training (DDP) -------------------------------------------------------------- +There are a few ways you can perform distributed training in +PyTorch with each method having their advantages in certain use cases: +* `DistributedDataParallel (DDP) <#learn-ddp>`__ +* `Fully Sharded Data Parallel (FSDP) <#learn-fsdp>`__ +* `Remote Procedure Call (RPC) distributed training <#learn-rpc>`__ +* `Pipeline Parallelism <#learn-pipeline-parallelism>`__ +Read more about these options in [Distributed Overview](../beginner/dist_overview.rst). -.. grid:: 3 +.. _learn-ddp: - .. grid-item-card:: :octicon:`file-code;1em` - Getting Started with PyTorch Distributed - :shadow: none - :link: https://example.com - :link-type: url - - This tutorial provides a gentle intro to the PyTorch - DistributedData Parallel. - - :octicon:`code;1em` Code +Learn DDP +--------- + +.. grid:: 3 .. grid-item-card:: :octicon:`file-code;1em` - Single Machine Model Parallel Best Practices + DDP Intro Video Tutorials :shadow: none :link: https://example.com :link-type: url - In this tutorial you will learn about best practices in - using model parallel. - - :octicon:`code;1em` Code :octicon:`square-fill;1em` :octicon:`video;1em` Video + A step-by-step video series on how to get started with + `DistributedDataParallel` and advance to more complex topics + +++ + :octicon:`code;1em` Code :octicon:`square-fill;1em` :octicon:`video;1em` Video - .. grid-item-card:: :octicon:`file-code;1em` Writing Distributed Applications with PyTorch + .. grid-item-card:: :octicon:`file-code;1em` + Getting Started with PyTorch Distributed :shadow: none :link: https://example.com :link-type: url - This tutorial demonstrates how to write a distributed application - with PyTorch. + This tutorial provides a short and gentle intro to the PyTorch + DistributedData Parallel. + +++ + :octicon:`code;1em` Code - :octicon:`code;1em` Code :octicon:`square-fill;1em` :octicon:`video;1em` Video +.. _learn-fsdp: Learn FSDP ---------- @@ -53,8 +59,93 @@ models. .. grid:: 3 + .. grid-item-card:: :octicon:`file-code;1em` + Getting Started with FSDP + :shadow: none + :link: https://example.com + :link-type: url + + This tutorial demonstrates how you can perform distributed training + with FSDP on a MNIST dataset. + +++ + :octicon:`code;1em` Code + + .. grid-item-card:: :octicon:`file-code;1em` + FSDP Advanced + :shadow: none + :link: https://example.com + :link-type: url + + In this tutorial, you will learn how to fine-tune a HuggingFace (HF) T5 + model with FSDP for text summarization. + +++ + :octicon:`code;1em` Code + +.. _learn-rpc: + Learn RPC --------- Distributed Remote Procedure Call (RPC) framework provides mechanisms for multi-machine model training + +.. grid:: 3 + + .. grid-item-card:: :octicon:`file-code;1em` + Getting Started with Distributed RPC Framework + :shadow: none + :link: https://example.com + :link-type: url + + This tutorial demonstrates how to get started with RPC-based distributed + training. + +++ + :octicon:`code;1em` Code + + .. grid-item-card:: :octicon:`file-code;1em` + Implementing a Parameter Server Using Distributed RPC Framework + :shadow: none + :link: https://example.com + :link-type: url + + This tutorial walks you through a simple example of implementing a + parameter server using PyTorch’s Distributed RPC framework. + +++ + :octicon:`code;1em` Code + + .. grid-item-card:: :octicon:`file-code;1em` + Distributed Pipeline Parallelism Using RPC + :shadow: none + :link: https://example.com + :link-type: url + + Learn how to use a Resnet50 model for distributed pipeline parallelism + with the Distributed RPC APIs. + +++ + :octicon:`code;1em` Code + +.. grid:: 3 + + .. grid-item-card:: :octicon:`file-code;1em` + Implementing Batch RPC Processing Using Asynchronous Executions + :shadow: none + :link: https://example.com + :link-type: url + + In this tutorial you will build batch-processing RPC applications + with the @rpc.functions.async_execution decorator. + +++ + :octicon:`code;1em` Code + + .. grid-item-card:: :octicon:`file-code;1em` + Combining Distributed DataParallel with Distributed RPC Framework + :shadow: none + :link: https://example.com + :link-type: url + + In this tutorial you will learn how to combine distributed data + parallelism with distributed model parallelism. + +++ + :octicon:`code;1em` Code + +.. _learn-pipeline-parallelism: From d7524ffab52cfca51814123d69800cb01208e51f Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 19 Sep 2022 10:58:43 -0700 Subject: [PATCH 7/9] Update --- distributed/home.rst | 54 ++++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/distributed/home.rst b/distributed/home.rst index 39eb7419c47..9a857a1b965 100644 --- a/distributed/home.rst +++ b/distributed/home.rst @@ -14,9 +14,9 @@ PyTorch with each method having their advantages in certain use cases: * `DistributedDataParallel (DDP) <#learn-ddp>`__ * `Fully Sharded Data Parallel (FSDP) <#learn-fsdp>`__ * `Remote Procedure Call (RPC) distributed training <#learn-rpc>`__ -* `Pipeline Parallelism <#learn-pipeline-parallelism>`__ +* `Custom Extensions <#custom-extensions>`__ -Read more about these options in [Distributed Overview](../beginner/dist_overview.rst). +Read more about these options in `Distributed Overview <../beginner/dist_overview.html>`__. .. _learn-ddp: @@ -47,16 +47,23 @@ Learn DDP +++ :octicon:`code;1em` Code + .. grid-item-card:: :octicon:`file-code;1em` + Distributed Training with Uneven Inputs Using + the Join Context Manager + :shadow: none + :link: ../advanced_source/generic_join.rst + :link-type: url + + This tutorial provides a short and gentle intro to the PyTorch + DistributedData Parallel. + +++ + :octicon:`code;1em` Code + .. _learn-fsdp: Learn FSDP ---------- -Fully-Sharded Data Parallel (FSDP) is a tool that distributes model -parameters across multiple workers, therefore enabling you to train larger -models. - - .. grid:: 3 .. grid-item-card:: :octicon:`file-code;1em` @@ -86,9 +93,6 @@ models. Learn RPC --------- -Distributed Remote Procedure Call (RPC) framework provides -mechanisms for multi-machine model training - .. grid:: 3 .. grid-item-card:: :octicon:`file-code;1em` @@ -114,38 +118,44 @@ mechanisms for multi-machine model training :octicon:`code;1em` Code .. grid-item-card:: :octicon:`file-code;1em` - Distributed Pipeline Parallelism Using RPC + Implementing Batch RPC Processing Using Asynchronous Executions :shadow: none :link: https://example.com :link-type: url - Learn how to use a Resnet50 model for distributed pipeline parallelism - with the Distributed RPC APIs. + In this tutorial you will build batch-processing RPC applications + with the @rpc.functions.async_execution decorator. +++ :octicon:`code;1em` Code .. grid:: 3 .. grid-item-card:: :octicon:`file-code;1em` - Implementing Batch RPC Processing Using Asynchronous Executions + Combining Distributed DataParallel with Distributed RPC Framework :shadow: none :link: https://example.com :link-type: url - In this tutorial you will build batch-processing RPC applications - with the @rpc.functions.async_execution decorator. + In this tutorial you will learn how to combine distributed data + parallelism with distributed model parallelism. +++ :octicon:`code;1em` Code +.. _custom-extensions: + +Custom Extensions +----------------- + +.. grid:: 3 + .. grid-item-card:: :octicon:`file-code;1em` - Combining Distributed DataParallel with Distributed RPC Framework + Customize Process Group Backends Using Cpp Extensions :shadow: none - :link: https://example.com + :link: intermediate/process_group_cpp_extension_tutorial.html :link-type: url - In this tutorial you will learn how to combine distributed data - parallelism with distributed model parallelism. + In this tutorial you will learn to implement a custom `ProcessGroup` + backend and plug that into PyTorch distributed package using + cpp extensions. +++ :octicon:`code;1em` Code - -.. _learn-pipeline-parallelism: From 340a4ca4e6ae8f567ae95bb043466633038ed9fe Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 19 Sep 2022 13:07:31 -0700 Subject: [PATCH 8/9] Update --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 8362ab3c233..048f6c08bab 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ # Refer to ./jenkins/build.sh for tutorial build instructions sphinx==5.0.0 -sphinx-gallery==0.9.0 +sphinx-gallery==0.11.1 sphinx_design docutils==0.16 sphinx-copybutton From 2200ff13be9b13e6d182e989e1b9cdc7aca8b577 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Tue, 4 Oct 2022 09:08:32 -0700 Subject: [PATCH 9/9] Add links --- distributed/home.rst | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/distributed/home.rst b/distributed/home.rst index 9a857a1b965..8bc3ade4293 100644 --- a/distributed/home.rst +++ b/distributed/home.rst @@ -28,7 +28,7 @@ Learn DDP .. grid-item-card:: :octicon:`file-code;1em` DDP Intro Video Tutorials :shadow: none - :link: https://example.com + :link: https://pytorch.org/tutorials/beginner/ddp_series_intro.html?utm_source=distr_landing&utm_medium=ddp_series_intro :link-type: url A step-by-step video series on how to get started with @@ -37,9 +37,9 @@ Learn DDP :octicon:`code;1em` Code :octicon:`square-fill;1em` :octicon:`video;1em` Video .. grid-item-card:: :octicon:`file-code;1em` - Getting Started with PyTorch Distributed + Getting Started with Distributed Data Parallel :shadow: none - :link: https://example.com + :link: https://pytorch.org/tutorials/intermediate/ddp_tutorial.html?utm_source=distr_landing&utm_medium=intermediate_ddp_tutorial :link-type: url This tutorial provides a short and gentle intro to the PyTorch @@ -51,7 +51,7 @@ Learn DDP Distributed Training with Uneven Inputs Using the Join Context Manager :shadow: none - :link: ../advanced_source/generic_join.rst + :link: https://pytorch.org/tutorials/advanced/generic_join.html?utm_source=distr_landing&utm_medium=generic_join :link-type: url This tutorial provides a short and gentle intro to the PyTorch @@ -69,7 +69,7 @@ Learn FSDP .. grid-item-card:: :octicon:`file-code;1em` Getting Started with FSDP :shadow: none - :link: https://example.com + :link: https://pytorch.org/tutorials/intermediate/FSDP_tutorial.html?utm_source=distr_landing&utm_medium=FSDP_getting_started :link-type: url This tutorial demonstrates how you can perform distributed training @@ -80,7 +80,7 @@ Learn FSDP .. grid-item-card:: :octicon:`file-code;1em` FSDP Advanced :shadow: none - :link: https://example.com + :link: https://pytorch.org/tutorials/intermediate/FSDP_adavnced_tutorial.html?utm_source=distr_landing&utm_medium=FSDP_advanced :link-type: url In this tutorial, you will learn how to fine-tune a HuggingFace (HF) T5 @@ -98,7 +98,7 @@ Learn RPC .. grid-item-card:: :octicon:`file-code;1em` Getting Started with Distributed RPC Framework :shadow: none - :link: https://example.com + :link: https://pytorch.org/tutorials/intermediate/rpc_tutorial.html?utm_source=distr_landing&utm_medium=rpc_getting_started :link-type: url This tutorial demonstrates how to get started with RPC-based distributed @@ -109,7 +109,7 @@ Learn RPC .. grid-item-card:: :octicon:`file-code;1em` Implementing a Parameter Server Using Distributed RPC Framework :shadow: none - :link: https://example.com + :link: https://pytorch.org/tutorials/intermediate/rpc_param_server_tutorial.html?utm_source=distr_landing&utm_medium=rpc_param_server_tutorial :link-type: url This tutorial walks you through a simple example of implementing a @@ -120,7 +120,7 @@ Learn RPC .. grid-item-card:: :octicon:`file-code;1em` Implementing Batch RPC Processing Using Asynchronous Executions :shadow: none - :link: https://example.com + :link: https://pytorch.org/tutorials/intermediate/rpc_async_execution.html?utm_source=distr_landing&utm_medium=rpc_async_execution :link-type: url In this tutorial you will build batch-processing RPC applications @@ -133,7 +133,7 @@ Learn RPC .. grid-item-card:: :octicon:`file-code;1em` Combining Distributed DataParallel with Distributed RPC Framework :shadow: none - :link: https://example.com + :link: https://pytorch.org/tutorials/advanced/rpc_ddp_tutorial.html?utm_source=distr_landing&utm_medium=rpc_plus_ddp :link-type: url In this tutorial you will learn how to combine distributed data @@ -151,7 +151,7 @@ Custom Extensions .. grid-item-card:: :octicon:`file-code;1em` Customize Process Group Backends Using Cpp Extensions :shadow: none - :link: intermediate/process_group_cpp_extension_tutorial.html + :link: https://pytorch.org/tutorials/intermediate/process_group_cpp_extension_tutorial.html?utm_source=distr_landing&utm_medium=custom_extensions_cpp :link-type: url In this tutorial you will learn to implement a custom `ProcessGroup`