Skip to content

Commit ed3f8a5

Browse files
committed
torch.compile tutorial update for pt2 stable release
1 parent 1f8c325 commit ed3f8a5

File tree

3 files changed

+11
-514
lines changed

3 files changed

+11
-514
lines changed

.jenkins/validate_tutorials_built.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@
3939
"recipes/profiler_recipe",
4040
"recipes/save_load_across_devices",
4141
"recipes/warmstarting_model_using_parameters_from_a_different_model",
42-
"torch_compile_tutorial_",
4342
"recipes/dynamic_quantization",
4443
"recipes/saving_and_loading_a_general_checkpoint",
4544
"recipes/benchmark",

intermediate_source/torch_compile_tutorial_.py renamed to intermediate_source/torch_compile_tutorial.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
#
2929
# **Required pip Dependencies**
3030
#
31-
# - ``torch >= 1.14``
31+
# - ``torch >= 2.0``
3232
# - ``torchvision``
3333
# - ``numpy``
3434
# - ``scipy``
@@ -52,9 +52,6 @@
5252

5353
import torch
5454

55-
import torch._inductor.config
56-
torch._inductor.config.cpp.cxx = ("g++",)
57-
5855
def foo(x, y):
5956
a = torch.sin(x)
6057
b = torch.cos(x)
@@ -133,6 +130,11 @@ def evaluate(mod, inp):
133130
return mod(inp)
134131

135132
model = init_model()
133+
134+
# Reset since we are using a different mode.
135+
import torch._dynamo
136+
torch._dynamo.reset()
137+
136138
evaluate_opt = torch.compile(evaluate, mode="reduce-overhead")
137139

138140
inp = generate_data(16)[0]
@@ -175,7 +177,7 @@ def evaluate(mod, inp):
175177
######################################################################
176178
# And indeed, we can see that running our model with ``torch.compile``
177179
# results in a significant speedup. On an NVIDIA A100 GPU, we observe a
178-
# 2.3x speedup. Speedup mainly comes from reducing Python overhead and
180+
# ~1.5x speedup. Speedup mainly comes from reducing Python overhead and
179181
# GPU read/writes, and so the observed speedup may vary on factors such as model
180182
# architecture and batch size. For example, if a model's architecture is simple
181183
# and the amount of data is large, then the bottleneck would be
@@ -233,7 +235,7 @@ def train(mod, data):
233235
# Again, we can see that ``torch.compile`` takes longer in the first
234236
# iteration, as it must compile the model, but afterward, we see
235237
# significant speedups compared to eager. On an NVIDIA A100 GPU, we
236-
# observe a 2.2x speedup.
238+
# observe a ~1.8x speedup.
237239

238240
######################################################################
239241
# Comparison to TorchScript and FX Tracing
@@ -297,6 +299,9 @@ def test_fns(fn1, fn2, args):
297299
# Now we can see that ``torch.compile`` correctly handles
298300
# data-dependent control flow.
299301

302+
# Reset since we are using a different mode.
303+
torch._dynamo.reset()
304+
300305
compile_f1 = torch.compile(f1)
301306
print("compile 1, 1:", test_fns(f1, compile_f1, (inp1, inp2)))
302307
print("compile 1, 2:", test_fns(f1, compile_f1, (-inp1, inp2)))
@@ -394,7 +399,6 @@ def custom_backend(gm: torch.fx.GraphModule, example_inputs: List[torch.Tensor])
394399
gm.graph.print_tabular()
395400
return gm.forward
396401

397-
import torch._dynamo
398402
# Reset since we are using a different backend.
399403
torch._dynamo.reset()
400404

0 commit comments

Comments
 (0)