Skip to content

Commit c449b4e

Browse files
committed
Update profiler_recipe.py to unify the accelerators python codes
one unified codes for both CUDA and XPU
1 parent 7fa0b6e commit c449b4e

File tree

1 file changed

+31
-33
lines changed

1 file changed

+31
-33
lines changed

recipes_source/recipes/profiler_recipe.py

Lines changed: 31 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -161,17 +161,26 @@
161161
# Note the occurrence of ``aten::convolution`` twice with different input shapes.
162162

163163
######################################################################
164-
# Profiler can also be used to analyze performance of models executed on GPUs:
164+
# Profiler can also be used to analyze performance of models executed on GPUs and XPUs:
165+
# Users could switch between cpu, cuda and xpu
166+
device = 'cuda'
165167

166-
model = models.resnet18().cuda()
167-
inputs = torch.randn(5, 3, 224, 224).cuda()
168+
activities = [ProfilerActivity.CPU]
169+
sort_by_keyword = device + "_time_total"
168170

169-
with profile(activities=[
170-
ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True) as prof:
171+
if device == 'cuda':
172+
activities.append(ProfilerActivity.CUDA)
173+
elif device == 'xpu':
174+
activities.append(ProfilerActivity.XPU)
175+
176+
model = models.resnet18().to(device)
177+
inputs = torch.randn(5, 3, 224, 224).to(device)
178+
179+
with profile(activities=activities, record_shapes=True) as prof:
171180
with record_function("model_inference"):
172181
model(inputs)
173182

174-
print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))
183+
print(prof.key_averages().table(sort_by=sort_by_keyword, row_limit=10))
175184

176185
######################################################################
177186
# (Note: the first use of CUDA profiling may bring an extra overhead.)
@@ -199,17 +208,7 @@
199208
# Self CUDA time total: 11.666ms
200209
#
201210
######################################################################
202-
# Profiler can also be used to analyze performance of models executed on XPUs:
203-
204-
model = models.resnet18().xpu()
205-
inputs = torch.randn(5, 3, 224, 224).xpu()
206-
207-
with profile(activities=[
208-
ProfilerActivity.CPU, ProfilerActivity.XPU], record_shapes=True) as prof:
209-
with record_function("model_inference"):
210-
model(inputs)
211211

212-
print(prof.key_averages().table(sort_by="xpu_time_total", row_limit=10))
213212

214213
######################################################################
215214
# (Note: the first use of XPU profiling may bring an extra overhead.)
@@ -307,12 +306,20 @@
307306
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
308307
#
309308
# Profiling results can be outputted as a ``.json`` trace file:
310-
# Tracing CUDA kernels
309+
# Tracing CUDA or XPU kernels
310+
# Users could switch between cpu, cuda and xpu
311+
device = 'cuda'
311312

312-
model = models.resnet18().cuda()
313-
inputs = torch.randn(5, 3, 224, 224).cuda()
313+
activities = [ProfilerActivity.CPU]
314+
if device == 'cuda':
315+
activities.append(ProfilerActivity.CUDA)
316+
elif device == 'xpu':
317+
activities.append(ProfilerActivity.XPU)
314318

315-
with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA]) as prof:
319+
model = models.resnet18().to(device)
320+
inputs = torch.randn(5, 3, 224, 224).to(device)
321+
322+
with profile(activities=activities) as prof:
316323
model(inputs)
317324

318325
prof.export_chrome_trace("trace.json")
@@ -324,18 +331,8 @@
324331
# .. image:: ../../_static/img/trace_img.png
325332
# :scale: 25 %
326333

327-
# Tracing XPU kernels
328-
329-
model = models.resnet18().xpu()
330-
inputs = torch.randn(5, 3, 224, 224).xpu()
331-
332-
with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.XPU]) as prof:
333-
model(inputs)
334-
335-
prof.export_chrome_trace("trace.json")
336-
337334
######################################################################
338-
# You can examine the sequence of profiled operators and CUDA kernels
335+
# You can examine the sequence of profiled operators and XPU kernels
339336
# in Chrome trace viewer (``chrome://tracing``):
340337
#
341338
# .. image:: ../../_static/img/trace_xpu_img.png
@@ -347,15 +344,16 @@
347344
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
348345
#
349346
# Profiler can be used to analyze Python and TorchScript stack traces:
347+
sort_by_keyword = "self_" + device + "_time_total"
350348

351349
with profile(
352-
activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
350+
activities=activities,
353351
with_stack=True,
354352
) as prof:
355353
model(inputs)
356354

357355
# Print aggregated stats
358-
print(prof.key_averages(group_by_stack_n=5).table(sort_by="self_cuda_time_total", row_limit=2))
356+
print(prof.key_averages(group_by_stack_n=5).table(sort_by=sort_by_keyword, row_limit=2))
359357

360358
#################################################################################
361359
# The output might look like this (omitting some columns):

0 commit comments

Comments
 (0)