Skip to content

Commit 41cac6c

Browse files
williamwen42Svetlana Karslioglu
and
Svetlana Karslioglu
committed
torch.compile tutorial update for pt2 stable release (#2224)
* torch.compile tutorial update for pt2 stable release * Update torch_compile_tutorial.py * remove speedup numbers --------- Co-authored-by: Svetlana Karslioglu <svekars@fb.com>
1 parent ccb38b4 commit 41cac6c

File tree

3 files changed

+14
-520
lines changed

3 files changed

+14
-520
lines changed

.jenkins/validate_tutorials_built.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@
3838
"recipes_source/recipes/profiler_recipe",
3939
"recipes_source/recipes/save_load_across_devices",
4040
"recipes_source/recipes/warmstarting_model_using_parameters_from_a_different_model",
41-
"intermediate_source/torch_compile_tutorial_",
4241
"recipes_source/recipes/dynamic_quantization",
4342
"recipes_source/recipes/saving_and_loading_a_general_checkpoint",
4443
"recipes_source/recipes/benchmark",
@@ -49,10 +48,9 @@
4948
"recipes_source/recipes/amp_recipe",
5049
"recipes_source/recipes/Captum_Recipe",
5150
"intermediate_source/flask_rest_api_tutorial",
52-
"intermediate_source/text_to_speech_with_torchaudio",
51+
"intermediate_source/text_to_speech_with_torchaudio"
5352
]
5453

55-
5654
def tutorial_source_dirs() -> List[Path]:
5755
return [
5856
p.relative_to(REPO_ROOT).with_name(p.stem[:-7])

intermediate_source/torch_compile_tutorial_.py renamed to intermediate_source/torch_compile_tutorial.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
#
2929
# **Required pip Dependencies**
3030
#
31-
# - ``torch >= 1.14``
31+
# - ``torch >= 2.0``
3232
# - ``torchvision``
3333
# - ``numpy``
3434
# - ``scipy``
@@ -52,9 +52,6 @@
5252

5353
import torch
5454

55-
import torch._inductor.config
56-
torch._inductor.config.cpp.cxx = ("g++",)
57-
5855
def foo(x, y):
5956
a = torch.sin(x)
6057
b = torch.cos(x)
@@ -133,6 +130,11 @@ def evaluate(mod, inp):
133130
return mod(inp)
134131

135132
model = init_model()
133+
134+
# Reset since we are using a different mode.
135+
import torch._dynamo
136+
torch._dynamo.reset()
137+
136138
evaluate_opt = torch.compile(evaluate, mode="reduce-overhead")
137139

138140
inp = generate_data(16)[0]
@@ -174,8 +176,7 @@ def evaluate(mod, inp):
174176

175177
######################################################################
176178
# And indeed, we can see that running our model with ``torch.compile``
177-
# results in a significant speedup. On an NVIDIA A100 GPU, we observe a
178-
# 2.3x speedup. Speedup mainly comes from reducing Python overhead and
179+
# results in a significant speedup. Speedup mainly comes from reducing Python overhead and
179180
# GPU read/writes, and so the observed speedup may vary on factors such as model
180181
# architecture and batch size. For example, if a model's architecture is simple
181182
# and the amount of data is large, then the bottleneck would be
@@ -231,9 +232,8 @@ def train(mod, data):
231232

232233
######################################################################
233234
# Again, we can see that ``torch.compile`` takes longer in the first
234-
# iteration, as it must compile the model, but afterward, we see
235-
# significant speedups compared to eager. On an NVIDIA A100 GPU, we
236-
# observe a 2.2x speedup.
235+
# iteration, as it must compile the model, but in subsequent iterations, we see
236+
# significant speedups compared to eager.
237237

238238
######################################################################
239239
# Comparison to TorchScript and FX Tracing
@@ -297,6 +297,9 @@ def test_fns(fn1, fn2, args):
297297
# Now we can see that ``torch.compile`` correctly handles
298298
# data-dependent control flow.
299299

300+
# Reset since we are using a different mode.
301+
torch._dynamo.reset()
302+
300303
compile_f1 = torch.compile(f1)
301304
print("compile 1, 1:", test_fns(f1, compile_f1, (inp1, inp2)))
302305
print("compile 1, 2:", test_fns(f1, compile_f1, (-inp1, inp2)))
@@ -394,7 +397,6 @@ def custom_backend(gm: torch.fx.GraphModule, example_inputs: List[torch.Tensor])
394397
gm.graph.print_tabular()
395398
return gm.forward
396399

397-
import torch._dynamo
398400
# Reset since we are using a different backend.
399401
torch._dynamo.reset()
400402

@@ -489,4 +491,4 @@ def bar(a, b):
489491
# In this tutorial, we introduced ``torch.compile`` by covering
490492
# basic usage, demonstrating speedups over eager mode, comparing to previous
491493
# PyTorch compiler solutions, and briefly investigating TorchDynamo and its interactions
492-
# with FX graphs. We hope that you will give ``torch.compile`` a try!
494+
# with FX graphs. We hope that you will give ``torch.compile`` a try!

0 commit comments

Comments
 (0)