From 987311dc521e56e30ee6d6b7230772eaf62877ef Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Wed, 14 May 2025 12:46:28 -0700 Subject: [PATCH 1/5] Improve discoverability of the transformer_building_blocks tutorial --- intermediate_source/transformer_building_blocks.py | 3 +++ prototype_source/nestedtensor.py | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/intermediate_source/transformer_building_blocks.py b/intermediate_source/transformer_building_blocks.py index 36b2019f19f..67860b85b79 100644 --- a/intermediate_source/transformer_building_blocks.py +++ b/intermediate_source/transformer_building_blocks.py @@ -1,4 +1,7 @@ """ +.. meta:: + :description: Learn how to optimize transformer models by replacing nn.Transformer with Nested Tensors and torch.compile() for significant performance gains in PyTorch. + Accelerating PyTorch Transformers by replacing ``nn.Transformer`` with Nested Tensors and ``torch.compile()`` ============================================================================================================= **Author:** `Mikayla Gawarecki `_ diff --git a/prototype_source/nestedtensor.py b/prototype_source/nestedtensor.py index ecf099c1e02..6578cf73454 100644 --- a/prototype_source/nestedtensor.py +++ b/prototype_source/nestedtensor.py @@ -369,3 +369,8 @@ def benchmark(func, *args, **kwargs): # how implement multi-head attention for transformers in a way that avoids computation on padding. # For more information, check out the docs for the # `torch.nested `__ namespace. +# +# See Also +# -------- +# +# * `Accelerating PyTorch Transformers by replacing nn.Transformer with Nested Tensors and torch.compile() Date: Wed, 14 May 2025 13:02:31 -0700 Subject: [PATCH 2/5] Update --- prototype_source/prototype_index.rst | 52 ++++++++++++++-------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/prototype_source/prototype_index.rst b/prototype_source/prototype_index.rst index c4986681dd6..1dc2769c888 100644 --- a/prototype_source/prototype_index.rst +++ b/prototype_source/prototype_index.rst @@ -294,29 +294,29 @@ Prototype features are not available as part of binary distributions like PyPI o .. toctree:: :hidden: - prototype/context_parallel.html - prototype/fx_graph_mode_quant_guide.html - prototype/fx_graph_mode_ptq_dynamic.html - prototype/fx_graph_mode_ptq_static.html - prototype/flight_recorder_tutorial.html - prototype/graph_mode_dynamic_bert_tutorial.html - prototype/inductor_cpp_wrapper_tutorial.html - prototype/inductor_windows.html - prototype/pt2e_quantizer.html - prototype/pt2e_quant_ptq.html - prototype/pt2e_quant_qat.html - prototype/ios_gpu_workflow.html - prototype/nnapi_mobilenetv2.html - prototype/tracing_based_selective_build.html - prototype/ios_coreml_workflow.html - prototype/numeric_suite_tutorial.html - prototype/torchscript_freezing.html - prototype/vmap_recipe.html - prototype/vulkan_workflow.html - prototype/nestedtensor.html - prototype/maskedtensor_overview.html - prototype/maskedtensor_sparsity.html - prototype/maskedtensor_advanced_semantics.html - prototype/maskedtensor_adagrad.html - prototype/python_extension_autoload.html - prototype/max_autotune_CPU_with_gemm_template_tutorial.html + prototype/context_parallel + prototype/fx_graph_mode_quant_guide + prototype/fx_graph_mode_ptq_dynamic + prototype/fx_graph_mode_ptq_static + prototype/flight_recorder_tutorial + prototype/graph_mode_dynamic_bert_tutorial + prototype/inductor_cpp_wrapper_tutorial + prototype/inductor_windows + prototype/pt2e_quantizer + prototype/pt2e_quant_ptq + prototype/pt2e_quant_qat + prototype/ios_gpu_workflow + prototype/nnapi_mobilenetv2 + prototype/tracing_based_selective_build + prototype/ios_coreml_workflow + prototype/numeric_suite_tutorial + prototype/torchscript_freezing + prototype/vmap_recipe + prototype/vulkan_workflow + prototype/nestedtensor + prototype/maskedtensor_overview + prototype/maskedtensor_sparsity + prototype/maskedtensor_advanced_semantics + prototype/maskedtensor_adagrad + prototype/python_extension_autoload + prototype/max_autotune_CPU_with_gemm_template_tutorial From f28ce221d19695e25d7a3046a4d6255d9a012486 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Wed, 14 May 2025 13:22:56 -0700 Subject: [PATCH 3/5] Update --- .ci/docker/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/docker/requirements.txt b/.ci/docker/requirements.txt index e6802cb045e..9730af6c03b 100644 --- a/.ci/docker/requirements.txt +++ b/.ci/docker/requirements.txt @@ -44,7 +44,7 @@ accelerate>=0.20.1 importlib-metadata==6.8.0 # PyTorch Theme --e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme +-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b2f32537f5e56086cdd35dcef7ca9fb6ed956543#egg=pytorch_sphinx_theme ipython From e69208acac022bc674b3eea60c39c63769fd52be Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Wed, 14 May 2025 13:24:19 -0700 Subject: [PATCH 4/5] Update --- .ci/docker/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/docker/requirements.txt b/.ci/docker/requirements.txt index 9730af6c03b..d80617b9df4 100644 --- a/.ci/docker/requirements.txt +++ b/.ci/docker/requirements.txt @@ -44,7 +44,7 @@ accelerate>=0.20.1 importlib-metadata==6.8.0 # PyTorch Theme --e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b2f32537f5e56086cdd35dcef7ca9fb6ed956543#egg=pytorch_sphinx_theme +-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b2f32537f5e56086cdd35dcef7ca9fb6ed956543#egg=pytorch_sphinx_theme2 ipython From e4ffffddde971419651a8a6384edc63b91cf20e9 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Wed, 14 May 2025 14:13:49 -0700 Subject: [PATCH 5/5] UPdate --- .ci/docker/requirements.txt | 2 +- prototype_source/prototype_index.rst | 52 ++++++++++++++-------------- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/.ci/docker/requirements.txt b/.ci/docker/requirements.txt index d80617b9df4..e6802cb045e 100644 --- a/.ci/docker/requirements.txt +++ b/.ci/docker/requirements.txt @@ -44,7 +44,7 @@ accelerate>=0.20.1 importlib-metadata==6.8.0 # PyTorch Theme --e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b2f32537f5e56086cdd35dcef7ca9fb6ed956543#egg=pytorch_sphinx_theme2 +-e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme ipython diff --git a/prototype_source/prototype_index.rst b/prototype_source/prototype_index.rst index 1dc2769c888..c4986681dd6 100644 --- a/prototype_source/prototype_index.rst +++ b/prototype_source/prototype_index.rst @@ -294,29 +294,29 @@ Prototype features are not available as part of binary distributions like PyPI o .. toctree:: :hidden: - prototype/context_parallel - prototype/fx_graph_mode_quant_guide - prototype/fx_graph_mode_ptq_dynamic - prototype/fx_graph_mode_ptq_static - prototype/flight_recorder_tutorial - prototype/graph_mode_dynamic_bert_tutorial - prototype/inductor_cpp_wrapper_tutorial - prototype/inductor_windows - prototype/pt2e_quantizer - prototype/pt2e_quant_ptq - prototype/pt2e_quant_qat - prototype/ios_gpu_workflow - prototype/nnapi_mobilenetv2 - prototype/tracing_based_selective_build - prototype/ios_coreml_workflow - prototype/numeric_suite_tutorial - prototype/torchscript_freezing - prototype/vmap_recipe - prototype/vulkan_workflow - prototype/nestedtensor - prototype/maskedtensor_overview - prototype/maskedtensor_sparsity - prototype/maskedtensor_advanced_semantics - prototype/maskedtensor_adagrad - prototype/python_extension_autoload - prototype/max_autotune_CPU_with_gemm_template_tutorial + prototype/context_parallel.html + prototype/fx_graph_mode_quant_guide.html + prototype/fx_graph_mode_ptq_dynamic.html + prototype/fx_graph_mode_ptq_static.html + prototype/flight_recorder_tutorial.html + prototype/graph_mode_dynamic_bert_tutorial.html + prototype/inductor_cpp_wrapper_tutorial.html + prototype/inductor_windows.html + prototype/pt2e_quantizer.html + prototype/pt2e_quant_ptq.html + prototype/pt2e_quant_qat.html + prototype/ios_gpu_workflow.html + prototype/nnapi_mobilenetv2.html + prototype/tracing_based_selective_build.html + prototype/ios_coreml_workflow.html + prototype/numeric_suite_tutorial.html + prototype/torchscript_freezing.html + prototype/vmap_recipe.html + prototype/vulkan_workflow.html + prototype/nestedtensor.html + prototype/maskedtensor_overview.html + prototype/maskedtensor_sparsity.html + prototype/maskedtensor_advanced_semantics.html + prototype/maskedtensor_adagrad.html + prototype/python_extension_autoload.html + prototype/max_autotune_CPU_with_gemm_template_tutorial.html