File tree Expand file tree Collapse file tree 1 file changed +4
-21
lines changed Expand file tree Collapse file tree 1 file changed +4
-21
lines changed Original file line number Diff line number Diff line change 442
442
# To send the signal to the profiler that the next step has started, call ``prof.step()`` function.
443
443
# The current profiler step is stored in ``prof.step_num``.
444
444
#
445
- # The following example shows how to use all of the concepts above for CUDA Kernels:
445
+ # The following example shows how to use all of the concepts above for CUDA and XPU Kernels:
446
446
447
- def trace_handler (p ):
448
- output = p .key_averages ().table (sort_by = "self_cuda_time_total" , row_limit = 10 )
449
- print (output )
450
- p .export_chrome_trace ("/tmp/trace_" + str (p .step_num ) + ".json" )
451
-
452
- with profile (
453
- activities = [ProfilerActivity .CPU , ProfilerActivity .CUDA ],
454
- schedule = torch .profiler .schedule (
455
- wait = 1 ,
456
- warmup = 1 ,
457
- active = 2 ),
458
- on_trace_ready = trace_handler
459
- ) as p :
460
- for idx in range (8 ):
461
- model (inputs )
462
- p .step ()
463
-
464
- # The following example shows how to use all of the concepts above for XPU Kernels:
447
+ sort_by_keyword = "self_" + device + "_time_total"
465
448
466
449
def trace_handler (p ):
467
- output = p .key_averages ().table (sort_by = "self_xpu_time_total" , row_limit = 10 )
450
+ output = p .key_averages ().table (sort_by = sort_by_keyword , row_limit = 10 )
468
451
print (output )
469
452
p .export_chrome_trace ("/tmp/trace_" + str (p .step_num ) + ".json" )
470
453
471
454
with profile (
472
- activities = [ ProfilerActivity . CPU , ProfilerActivity . XPU ] ,
455
+ activities = activities ,
473
456
schedule = torch .profiler .schedule (
474
457
wait = 1 ,
475
458
warmup = 1 ,
You can’t perform that action at this time.
0 commit comments