Merge branch 'main' into jingxu10/torch_113522

svekars · web-flow · commit 4c1d4bd07dad · 2024-06-04T08:11:57.000-07:00
diff --git a/.github/scripts/docathon-label-sync.py b/.github/scripts/docathon-label-sync.py
@@ -25,19 +25,19 @@ def main():
     issue_number = int(re.findall(r'#(\d{1,5})', pull_request_body)[0])
     issue = repo.get_issue(issue_number)
     issue_labels = issue.labels
-    docathon_label_present = any(label.name == 'docathon-h2-2023' for label in issue_labels)
+    docathon_label_present = any(label.name == 'docathon-h1-2024' for label in issue_labels)
 
     # if the issue has a docathon label, add all labels from the issue to the PR.
     if not docathon_label_present:
-        print("The 'docathon-h2-2023' label is not present in the issue.")
+        print("The 'docathon-h1-2024' label is not present in the issue.")
         return    
     pull_request_labels = pull_request.get_labels()
     issue_label_names = [label.name for label in issue_labels]
     labels_to_add = [label for label in issue_label_names if label not in pull_request_labels]
     if not labels_to_add:
         print("The pull request already has the same labels.")
         return
-    pull_request.set_labels(*labels_to_add)
+    pull_request.add_to_labels(*labels_to_add)
     print("Labels added to the pull request!")
         
 
diff --git a/.github/workflows/docathon-label-sync.yml b/.github/workflows/docathon-label-sync.yml
@@ -7,12 +7,14 @@ on:
 jobs:
   check-labels:
     runs-on: ubuntu-latest
-
+    permissions:
+      issues: write
+      pull-requests: write
     steps:
       - name: Check if PR mentions an issue and get labels
         uses: actions/checkout@v2
         with:
-          fetch-depth: 0
+          fetch-depth: 1
       - name: Set up Python
         uses: actions/setup-python@v2
         with:
diff --git a/intermediate_source/dist_tuto.rst b/intermediate_source/dist_tuto.rst
@@ -327,7 +327,7 @@ the following few lines:
                                      transforms.Normalize((0.1307,), (0.3081,))
                                  ]))
         size = dist.get_world_size()
-        bsz = 128 / float(size)
+        bsz = 128 // size
         partition_sizes = [1.0 / size for _ in range(size)]
         partition = DataPartitioner(dataset, partition_sizes)
         partition = partition.use(dist.get_rank())
diff --git a/recipes_source/recipes/tuning_guide.py b/recipes_source/recipes/tuning_guide.py
@@ -94,35 +94,36 @@
 # ``optimizer.zero_grad(set_to_none=True)``.
 
 ###############################################################################
-# Fuse pointwise operations
+# Fuse operations
 # ~~~~~~~~~~~~~~~~~~~~~~~~~
-# Pointwise operations (elementwise addition, multiplication, math functions -
-# ``sin()``, ``cos()``, ``sigmoid()`` etc.) can be fused into a single kernel
-# to amortize memory access time and kernel launch time.
-#
-# `PyTorch JIT <https://pytorch.org/docs/stable/jit.html>`_ can fuse kernels
-# automatically, although there could be additional fusion opportunities not yet
-# implemented in the compiler, and not all device types are supported equally.
-#
-# Pointwise operations are memory-bound, for each operation PyTorch launches a
-# separate kernel. Each kernel loads data from the memory, performs computation
-# (this step is usually inexpensive) and stores results back into the memory.
-#
-# Fused operator launches only one kernel for multiple fused pointwise ops and
-# loads/stores data only once to the memory. This makes JIT very useful for
-# activation functions, optimizers, custom RNN cells etc.
+# Pointwise operations such as elementwise addition, multiplication, and math
+# functions like `sin()`, `cos()`, `sigmoid()`, etc., can be combined into a
+# single kernel. This fusion helps reduce memory access and kernel launch times.
+# Typically, pointwise operations are memory-bound; PyTorch eager-mode initiates
+# a separate kernel for each operation, which involves loading data from memory,
+# executing the operation (often not the most time-consuming step), and writing
+# the results back to memory.
+# 
+# By using a fused operator, only one kernel is launched for multiple pointwise
+# operations, and data is loaded and stored just once. This efficiency is
+# particularly beneficial for activation functions, optimizers, and custom RNN cells etc.
+#
+# PyTorch 2 introduces a compile-mode facilitated by TorchInductor, an underlying compiler
+# that automatically fuses kernels. TorchInductor extends its capabilities beyond simple
+# element-wise operations, enabling advanced fusion of eligible pointwise and reduction
+# operations for optimized performance.
 #
 # In the simplest case fusion can be enabled by applying
-# `torch.jit.script <https://pytorch.org/docs/stable/generated/torch.jit.script.html#torch.jit.script>`_
+# `torch.compile <https://pytorch.org/docs/stable/generated/torch.compile.html>`_
 # decorator to the function definition, for example:
 
-@torch.jit.script
-def fused_gelu(x):
+@torch.compile
+def gelu(x):
     return x * 0.5 * (1.0 + torch.erf(x / 1.41421))
 
 ###############################################################################
 # Refer to
-# `TorchScript documentation <https://pytorch.org/docs/stable/jit.html>`_
+# `Introduction to torch.compile <https://pytorch.org/tutorials/intermediate/torch_compile_tutorial.html>`_
 # for more advanced use cases.
 
 ###############################################################################