Skip to content

Commit 60cee89

Browse files
author
anupamb
committed
HTA tutorial
1 parent b58b8a5 commit 60cee89

File tree

3 files changed

+77
-5
lines changed

3 files changed

+77
-5
lines changed

beginner_source/hta_intro.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Introduction to Holistic Trace Analysis
4+
------------
5+
**Author:** `Anupam Bhatnagar <https://github.com/anupambhatnagar>`_
6+
7+
.. note::
8+
Visualizations have been set to False to keep the notebook size small. When
9+
running the notebook locally set the visualize variable to True to display
10+
the plots.
11+
12+
"""
13+
14+
##############################################################
15+
# Setup and loading traces
16+
# ~~~~~~~~~~~~~~~~~~~~~~~~
17+
#
18+
# In this demo we analyze the traces from a distributed training job which used 8 GPUs. To run the code on your laptop:
19+
#
20+
# 1) Install Holistic Trace Analysis via pip. `pip install HolisticTraceAnalysis`
21+
# 2) [Optional and recommended] Setup a conda environment. See here for details.
22+
# 3) Edit the `hta_install_dir` vairable below to the folder in your local `HolisticTraceAnalysis` installation.
23+
24+
from hta.trace_analysis import TraceAnalysis
25+
hta_install_dir = "/path/to/HolisticTraceAnalysis"
26+
trace_dir = hta_install_dir + "/tests/data/vision_transformer/"
27+
analyzer = TraceAnalysis(trace_dir=trace_dir)
28+
29+
30+
##############################################################
31+
# Temporal Breakdown
32+
# ~~~~~~~~~~~~~~~~~~
33+
#
34+
# The temporal breakdown feature gives a breakdown of time spent by the GPU as follows:
35+
#
36+
# 1) Idle time - GPU idle
37+
# 2) Compute time - GPU busy with computation events
38+
# 3) Non compute time - GPU busy with communication or memory events
39+
40+
time_spent_df = analyzer.get_temporal_breakdown(visualize=False)
41+
print(time_spent_df)
42+
43+
44+
##############################################################
45+
# Kernel Breakdown
46+
# ~~~~~~~~~~~~~~~~
47+
#
48+
# This feature computes the following:
49+
#
50+
# 1) Breakdown of time spent among kernel types (Computation, Communication, Memory) across all ranks.
51+
# 2) Kernels taking the most time on each rank by kernel type.
52+
# 3) Distribution of average time across ranks for the kernels taking the most time.

index.rst

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,17 @@ What's new in PyTorch tutorials?
378378
:link: advanced/super_resolution_with_onnxruntime.html
379379
:tags: Production,ONNX
380380

381+
.. customcarditem::
382+
:header: Profiling PyTorch
383+
:card_description: Learn how to profile a PyTorch application
384+
:link: beginner/profiler.html
385+
:tags: Profiling
386+
387+
.. customcarditem::
388+
:header: Profiling PyTorch
389+
:card_description: Introduction to Holistic Trace Analysis
390+
:link: beginner/hta_intro.html
391+
:tags: Profiling
381392

382393
.. Code Transformations with FX
383394
@@ -993,6 +1004,15 @@ Additional Resources
9931004
advanced/super_resolution_with_onnxruntime
9941005
intermediate/realtime_rpi
9951006

1007+
.. toctree::
1008+
:maxdepth: 2
1009+
:includehidden:
1010+
:hidden:
1011+
:caption: Profiling PyTorch
1012+
1013+
beginner/profiler
1014+
beginner/hta_intro
1015+
9961016
.. toctree::
9971017
:maxdepth: 2
9981018
:includehidden:

requirements.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ bs4
2020
awscliv2==2.1.1
2121
flask
2222
spacy==3.4.1
23-
ray[tune]==2.4.0
23+
#ray[tune]==2.4.0
2424
tensorboard
2525
jinja2==3.0.3
2626
pytorch-lightning
@@ -32,9 +32,9 @@ nbformat>=4.2.0
3232
datasets
3333
transformers
3434
torchmultimodal-nightly # needs to be updated to stable as soon as it's avaialable
35-
onnx
36-
onnxscript
37-
onnxruntime
35+
#onnx
36+
#onnxscript
37+
#onnxruntime
3838

3939
importlib-metadata==6.8.0
4040

@@ -58,6 +58,6 @@ pyopengl
5858
gymnasium[mujoco]==0.27.0
5959
timm
6060
iopath
61-
pygame==2.1.2
61+
#pygame==2.1.2
6262
pycocotools
6363
semilearn==0.3.2

0 commit comments

Comments
 (0)