Skip to content

Commit 4c1d4bd

Browse files
authored
Merge branch 'main' into jingxu10/torch_113522
2 parents 6af540e + a58f40f commit 4c1d4bd

File tree

4 files changed

+29
-26
lines changed

4 files changed

+29
-26
lines changed

.github/scripts/docathon-label-sync.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,19 +25,19 @@ def main():
2525
issue_number = int(re.findall(r'#(\d{1,5})', pull_request_body)[0])
2626
issue = repo.get_issue(issue_number)
2727
issue_labels = issue.labels
28-
docathon_label_present = any(label.name == 'docathon-h2-2023' for label in issue_labels)
28+
docathon_label_present = any(label.name == 'docathon-h1-2024' for label in issue_labels)
2929

3030
# if the issue has a docathon label, add all labels from the issue to the PR.
3131
if not docathon_label_present:
32-
print("The 'docathon-h2-2023' label is not present in the issue.")
32+
print("The 'docathon-h1-2024' label is not present in the issue.")
3333
return
3434
pull_request_labels = pull_request.get_labels()
3535
issue_label_names = [label.name for label in issue_labels]
3636
labels_to_add = [label for label in issue_label_names if label not in pull_request_labels]
3737
if not labels_to_add:
3838
print("The pull request already has the same labels.")
3939
return
40-
pull_request.set_labels(*labels_to_add)
40+
pull_request.add_to_labels(*labels_to_add)
4141
print("Labels added to the pull request!")
4242

4343

.github/workflows/docathon-label-sync.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,14 @@ on:
77
jobs:
88
check-labels:
99
runs-on: ubuntu-latest
10-
10+
permissions:
11+
issues: write
12+
pull-requests: write
1113
steps:
1214
- name: Check if PR mentions an issue and get labels
1315
uses: actions/checkout@v2
1416
with:
15-
fetch-depth: 0
17+
fetch-depth: 1
1618
- name: Set up Python
1719
uses: actions/setup-python@v2
1820
with:

intermediate_source/dist_tuto.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,7 @@ the following few lines:
327327
transforms.Normalize((0.1307,), (0.3081,))
328328
]))
329329
size = dist.get_world_size()
330-
bsz = 128 / float(size)
330+
bsz = 128 // size
331331
partition_sizes = [1.0 / size for _ in range(size)]
332332
partition = DataPartitioner(dataset, partition_sizes)
333333
partition = partition.use(dist.get_rank())

recipes_source/recipes/tuning_guide.py

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -94,35 +94,36 @@
9494
# ``optimizer.zero_grad(set_to_none=True)``.
9595

9696
###############################################################################
97-
# Fuse pointwise operations
97+
# Fuse operations
9898
# ~~~~~~~~~~~~~~~~~~~~~~~~~
99-
# Pointwise operations (elementwise addition, multiplication, math functions -
100-
# ``sin()``, ``cos()``, ``sigmoid()`` etc.) can be fused into a single kernel
101-
# to amortize memory access time and kernel launch time.
102-
#
103-
# `PyTorch JIT <https://pytorch.org/docs/stable/jit.html>`_ can fuse kernels
104-
# automatically, although there could be additional fusion opportunities not yet
105-
# implemented in the compiler, and not all device types are supported equally.
106-
#
107-
# Pointwise operations are memory-bound, for each operation PyTorch launches a
108-
# separate kernel. Each kernel loads data from the memory, performs computation
109-
# (this step is usually inexpensive) and stores results back into the memory.
110-
#
111-
# Fused operator launches only one kernel for multiple fused pointwise ops and
112-
# loads/stores data only once to the memory. This makes JIT very useful for
113-
# activation functions, optimizers, custom RNN cells etc.
99+
# Pointwise operations such as elementwise addition, multiplication, and math
100+
# functions like `sin()`, `cos()`, `sigmoid()`, etc., can be combined into a
101+
# single kernel. This fusion helps reduce memory access and kernel launch times.
102+
# Typically, pointwise operations are memory-bound; PyTorch eager-mode initiates
103+
# a separate kernel for each operation, which involves loading data from memory,
104+
# executing the operation (often not the most time-consuming step), and writing
105+
# the results back to memory.
106+
#
107+
# By using a fused operator, only one kernel is launched for multiple pointwise
108+
# operations, and data is loaded and stored just once. This efficiency is
109+
# particularly beneficial for activation functions, optimizers, and custom RNN cells etc.
110+
#
111+
# PyTorch 2 introduces a compile-mode facilitated by TorchInductor, an underlying compiler
112+
# that automatically fuses kernels. TorchInductor extends its capabilities beyond simple
113+
# element-wise operations, enabling advanced fusion of eligible pointwise and reduction
114+
# operations for optimized performance.
114115
#
115116
# In the simplest case fusion can be enabled by applying
116-
# `torch.jit.script <https://pytorch.org/docs/stable/generated/torch.jit.script.html#torch.jit.script>`_
117+
# `torch.compile <https://pytorch.org/docs/stable/generated/torch.compile.html>`_
117118
# decorator to the function definition, for example:
118119

119-
@torch.jit.script
120-
def fused_gelu(x):
120+
@torch.compile
121+
def gelu(x):
121122
return x * 0.5 * (1.0 + torch.erf(x / 1.41421))
122123

123124
###############################################################################
124125
# Refer to
125-
# `TorchScript documentation <https://pytorch.org/docs/stable/jit.html>`_
126+
# `Introduction to torch.compile <https://pytorch.org/tutorials/intermediate/torch_compile_tutorial.html>`_
126127
# for more advanced use cases.
127128

128129
###############################################################################

0 commit comments

Comments
 (0)