Skip to content

Commit 17d27d4

Browse files
authored
Merge branch 'main' into fix_typo
2 parents ccb5a1f + 109d464 commit 17d27d4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+2206
-823
lines changed

.jenkins/build.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ pip install --progress-bar off -r $DIR/../requirements.txt
2424

2525
#Install PyTorch Nightly for test.
2626
# Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html
27-
# Install 2.1 for testing
28-
# pip uninstall -y torch torchvision torchaudio torchtext torchdata
29-
# pip3 install torch torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu121
30-
# pip3 install torchdata torchtext --index-url https://download.pytorch.org/whl/test/cpu
27+
# Install 2.2 for testing
28+
pip uninstall -y torch torchvision torchaudio torchtext torchdata
29+
pip3 install torch==2.2.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu121
30+
pip3 install torchdata torchtext --index-url https://download.pytorch.org/whl/test/cpu
3131

3232
# Install two language tokenizers for Translation with TorchText tutorial
3333
python -m spacy download en_core_web_sm

.jenkins/validate_tutorials_built.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
"intermediate_source/_torch_export_nightly_tutorial", # does not work on release
3030
"advanced_source/super_resolution_with_onnxruntime",
3131
"advanced_source/ddp_pipeline", # requires 4 gpus
32+
"advanced_source/usb_semisup_learn", # in the current form takes 140+ minutes to build - can be enabled when the build time is reduced
3233
"prototype_source/fx_graph_mode_ptq_dynamic",
3334
"prototype_source/vmap_recipe",
3435
"prototype_source/torchscript_freezing",

.pyspelling.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ matrix:
4545
- open: '\.\. (code-block|math)::.*$\n*'
4646
content: '(?P<first>(^(?P<indent>[ ]+).*$\n))(?P<other>(^([ \t]+.*|[ \t]*)$\n)*)'
4747
close: '(^(?![ \t]+.*$))'
48+
# Ignore references like "[1] Author: Title"
49+
- open: '\[\d\]'
50+
close: '\n'
4851
- pyspelling.filters.markdown:
4952
- pyspelling.filters.html:
5053
ignores:
@@ -97,7 +100,7 @@ matrix:
97100
content: '''''*'
98101
close: '$'
99102
# Ignore reStructuredText block directives
100-
- open: '\.\. (code-block|math)::.*$\n*'
103+
- open: '\.\. (code-block|math|table)::.*$\n*'
101104
content: '(?P<first>(^(?P<indent>[ ]+).*$\n))(?P<other>(^([ \t]+.*|[ \t]*)$\n)*)'
102105
close: '(^(?![ \t]+.*$))'
103106
- open: '\.\. (raw)::.*$\n*'

Makefile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,9 @@ download:
8686
wget -nv -N https://www.manythings.org/anki/deu-eng.zip -P $(DATADIR)
8787
unzip -o $(DATADIR)/deu-eng.zip -d beginner_source/data/
8888

89+
# Download PennFudanPed dataset for intermediate_source/torchvision_tutorial.py
90+
wget https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip -P $(DATADIR)
91+
unzip -o $(DATADIR)/PennFudanPed.zip -d intermediate_source/data/
8992

9093
docs:
9194
make download
@@ -103,3 +106,5 @@ html-noplot:
103106
clean-cache:
104107
make clean
105108
rm -rf advanced beginner intermediate recipes
109+
# remove additional python files downloaded for torchvision_tutorial.py
110+
rm -rf intermediate_source/engine.py intermediate_source/utils.py intermediate_source/transforms.py intermediate_source/coco_eval.py intermediate_source/coco_utils.py

README.md

Lines changed: 3 additions & 3 deletions

_static/css/custom2.css

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/* Survey banner .css */
2+
3+
.survey-banner {
4+
margin-top: 10px;
5+
background-color: #f3f4f7;
6+
padding-top: 15px;
7+
padding-left: 10px;
8+
padding-bottom: 1px;
9+
}
10+
11+
@media screen and (max-width: 600px) {
12+
.survey-banner {
13+
padding-top: 5px;
14+
padding-left: 5px;
15+
padding-bottom: -1px;
16+
font-size: 12px;
17+
margin-bottom: 5px;
18+
}
19+
}

_static/img/hta/comm_across_ranks.png

46.2 KB

_static/img/hta/counts_diff.png

54.1 KB
90.8 KB
26.9 KB

_static/img/hta/duration_diff.png

80.1 KB

_static/img/hta/idle_time.png

42.4 KB

_static/img/hta/idle_time_summary.png

76.2 KB

_static/img/hta/kernel_metrics_df.png

84.3 KB
43.5 KB
35 KB
102 KB

_static/img/hta/overlap_df.png

36.3 KB

_static/img/hta/overlap_plot.png

44.9 KB

_static/img/hta/pie_charts.png

76.4 KB
48.1 KB

_static/img/hta/runtime_outliers.png

31.9 KB

_static/img/hta/short_gpu_kernels.png

38.3 KB
114 KB
46.9 KB
Binary file not shown.
-612 KB
Binary file not shown.
-12.4 KB
Binary file not shown.
-418 KB
Binary file not shown.
-849 KB
Binary file not shown.

_static/tiatoolbox_tutorial.ipynb

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

_templates/layout.html

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
{% extends "!layout.html" %}
22

3-
43
{%- block content %}
54
{{ super() }}
65
<script>
@@ -17,7 +16,9 @@
1716
{{ super() }}
1817
<script>
1918

20-
19+
// Helper function to make it easier to call dataLayer.push()
20+
function gtag(){window.dataLayer.push(arguments);}
21+
2122
//add microsoft link
2223

2324
if(window.location.href.indexOf("/beginner/basics/")!= -1)
@@ -111,4 +112,14 @@
111112
</script>
112113

113114
<img height="1" width="1" style="border-style:none;" alt="" src="https://www.googleadservices.com/pagead/conversion/795629140/?label=txkmCPmdtosBENSssfsC&amp;guid=ON&amp;script=0"/>
115+
116+
//temporarily add a link to survey
117+
<script>
118+
var survey = '<div class="survey-banner"><p><i class="fas fa-poll" aria-hidden="true">&nbsp </i> Take the annual <a href="https://forms.gle/jdNexNU6eZ8mCGDY7">PyTorch Tutorials survey</a>.</p></div>'
119+
if ($(".pytorch-call-to-action-links").length) {
120+
$(".pytorch-call-to-action-links").before(survey);
121+
} else {
122+
$("#pytorch-article").prepend(survey);
123+
}
124+
</script>
114125
{% endblock %}

advanced_source/static_quantization_tutorial.rst

Lines changed: 3 additions & 0 deletions

advanced_source/usb_semisup_learn.py

Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
"""
2+
Semi-Supervised Learning using USB built upon PyTorch
3+
=====================================================
4+
5+
**Author**: `Hao Chen <https://github.com/Hhhhhhao>`_
6+
7+
Unified Semi-supervised learning Benchmark (USB) is a semi-supervised
8+
learning framework built upon PyTorch.
9+
Based on Datasets and Modules provided by PyTorch, USB becomes a flexible,
10+
modular, and easy-to-use framework for semi-supervised learning.
11+
It supports a variety of semi-supervised learning algorithms, including
12+
``FixMatch``, ``FreeMatch``, ``DeFixMatch``, ``SoftMatch``, and so on.
13+
It also supports a variety of imbalanced semi-supervised learning algorithms.
14+
The benchmark results across different datasets of computer vision, natural
15+
language processing, and speech processing are included in USB.
16+
17+
This tutorial will walk you through the basics of using the USB lighting
18+
package.
19+
Let's get started by training a ``FreeMatch``/``SoftMatch`` model on
20+
CIFAR-10 using pretrained ViT!
21+
And we will show it is easy to change the semi-supervised algorithm and train
22+
on imbalanced datasets.
23+
24+
25+
.. figure:: /_static/img/usb_semisup_learn/code.png
26+
:alt: USB framework illustration
27+
"""
28+
29+
30+
######################################################################
31+
# Introduction to ``FreeMatch`` and ``SoftMatch`` in Semi-Supervised Learning
32+
# ---------------------------------------------------------------------------
33+
#
34+
# Here we provide a brief introduction to ``FreeMatch`` and ``SoftMatch``.
35+
# First, we introduce a famous baseline for semi-supervised learning called ``FixMatch``.
36+
# ``FixMatch`` is a very simple framework for semi-supervised learning, where it
37+
# utilizes a strong augmentation to generate pseudo labels for unlabeled data.
38+
# It adopts a confidence thresholding strategy to filter out the low-confidence
39+
# pseudo labels with a fixed threshold set.
40+
# ``FreeMatch`` and ``SoftMatch`` are two algorithms that improve upon ``FixMatch``.
41+
# ``FreeMatch`` proposes adaptive thresholding strategy to replace the fixed
42+
# thresholding strategy in ``FixMatch``. The adaptive thresholding progressively
43+
# increases the threshold according to the learning status of the model on each
44+
# class. ``SoftMatch`` absorbs the idea of confidence thresholding as an
45+
# weighting mechanism. It proposes a Gaussian weighting mechanism to overcome
46+
# the quantity-quality trade-off in pseudo-labels. In this tutorial, we will
47+
# use USB to train ``FreeMatch`` and ``SoftMatch``.
48+
49+
50+
######################################################################
51+
# Use USB to Train ``FreeMatch``/``SoftMatch`` on CIFAR-10 with only 40 labels
52+
# ----------------------------------------------------------------------------
53+
#
54+
# USB is easy to use and extend, affordable to small groups, and comprehensive
55+
# for developing and evaluating SSL algorithms.
56+
# USB provides the implementation of 14 SSL algorithms based on Consistency
57+
# Regularization, and 15 tasks for evaluation from CV, NLP, and Audio domain.
58+
# It has a modular design that allows users to easily extend the package by
59+
# adding new algorithms and tasks.
60+
# It also supports a Python API for easier adaptation to different SSL
61+
# algorithms on new data.
62+
#
63+
#
64+
# Now, let's use USB to train ``FreeMatch`` and ``SoftMatch`` on CIFAR-10.
65+
# First, we need to install USB package ``semilearn`` and import necessary API
66+
# functions from USB.
67+
# Below is a list of functions we will use from ``semilearn``:
68+
#
69+
# - ``get_dataset`` to load dataset, here we use CIFAR-10
70+
# - ``get_data_loader`` to create train (labeled and unlabeled) and test data
71+
# loaders, the train unlabeled loaders will provide both strong and weak
72+
# augmentation of unlabeled data
73+
# - ``get_net_builder`` to create a model, here we use pretrained ViT
74+
# - ``get_algorithm`` to create the semi-supervised learning algorithm,
75+
# here we use ``FreeMatch`` and ``SoftMatch``
76+
# - ``get_config``: to get default configuration of the algorithm
77+
# - ``Trainer``: a Trainer class for training and evaluating the
78+
# algorithm on dataset
79+
#
80+
import semilearn
81+
from semilearn import get_dataset, get_data_loader, get_net_builder, get_algorithm, get_config, Trainer
82+
83+
######################################################################
84+
# After importing necessary functions, we first set the hyper-parameters of the
85+
# algorithm.
86+
#
87+
config = {
88+
'algorithm': 'freematch',
89+
'net': 'vit_tiny_patch2_32',
90+
'use_pretrain': True,
91+
'pretrain_path': 'https://github.com/microsoft/Semi-supervised-learning/releases/download/v.0.0.0/vit_tiny_patch2_32_mlp_im_1k_32.pth',
92+
93+
# optimization configs
94+
'epoch': 1,
95+
'num_train_iter': 4000,
96+
'num_eval_iter': 500,
97+
'num_log_iter': 50,
98+
'optim': 'AdamW',
99+
'lr': 5e-4,
100+
'layer_decay': 0.5,
101+
'batch_size': 16,
102+
'eval_batch_size': 16,
103+
104+
105+
# dataset configs
106+
'dataset': 'cifar10',
107+
'num_labels': 40,
108+
'num_classes': 10,
109+
'img_size': 32,
110+
'crop_ratio': 0.875,
111+
'data_dir': './data',
112+
'ulb_samples_per_class': None,
113+
114+
# algorithm specific configs
115+
'hard_label': True,
116+
'T': 0.5,
117+
'ema_p': 0.999,
118+
'ent_loss_ratio': 0.001,
119+
'uratio': 2,
120+
'ulb_loss_ratio': 1.0,
121+
122+
# device configs
123+
'gpu': 0,
124+
'world_size': 1,
125+
'distributed': False,
126+
"num_workers": 4,
127+
}
128+
config = get_config(config)
129+
130+
131+
######################################################################
132+
# Then, we load the dataset and create data loaders for training and testing.
133+
# And we specify the model and algorithm to use.
134+
#
135+
dataset_dict = get_dataset(config, config.algorithm, config.dataset, config.num_labels, config.num_classes, data_dir=config.data_dir, include_lb_to_ulb=config.include_lb_to_ulb)
136+
train_lb_loader = get_data_loader(config, dataset_dict['train_lb'], config.batch_size)
137+
train_ulb_loader = get_data_loader(config, dataset_dict['train_ulb'], int(config.batch_size * config.uratio))
138+
eval_loader = get_data_loader(config, dataset_dict['eval'], config.eval_batch_size)
139+
algorithm = get_algorithm(config, get_net_builder(config.net, from_name=False), tb_log=None, logger=None)
140+
141+
142+
######################################################################
143+
# We can start training the algorithms on CIFAR-10 with 40 labels now.
144+
# We train for 4000 iterations and evaluate every 500 iterations.
145+
#
146+
trainer = Trainer(config, algorithm)
147+
trainer.fit(train_lb_loader, train_ulb_loader, eval_loader)
148+
149+
150+
######################################################################
151+
# Finally, let's evaluate the trained model on the validation set.
152+
# After training 4000 iterations with ``FreeMatch`` on only 40 labels of
153+
# CIFAR-10, we obtain a classifier that achieves above 93 accuracy on the validation set.
154+
trainer.evaluate(eval_loader)
155+
156+
157+
158+
######################################################################
159+
# Use USB to Train ``SoftMatch`` with specific imbalanced algorithm on imbalanced CIFAR-10
160+
# ------------------------------------------------------------------------------------
161+
#
162+
# Now let's say we have imbalanced labeled set and unlabeled set of CIFAR-10,
163+
# and we want to train a ``SoftMatch`` model on it.
164+
# We create an imbalanced labeled set and imbalanced unlabeled set of CIFAR-10,
165+
# by setting the ``lb_imb_ratio`` and ``ulb_imb_ratio`` to 10.
166+
# Also, we replace the ``algorithm`` with ``softmatch`` and set the ``imbalanced``
167+
# to ``True``.
168+
#
169+
config = {
170+
'algorithm': 'softmatch',
171+
'net': 'vit_tiny_patch2_32',
172+
'use_pretrain': True,
173+
'pretrain_path': 'https://github.com/microsoft/Semi-supervised-learning/releases/download/v.0.0.0/vit_tiny_patch2_32_mlp_im_1k_32.pth',
174+
175+
# optimization configs
176+
'epoch': 1,
177+
'num_train_iter': 4000,
178+
'num_eval_iter': 500,
179+
'num_log_iter': 50,
180+
'optim': 'AdamW',
181+
'lr': 5e-4,
182+
'layer_decay': 0.5,
183+
'batch_size': 16,
184+
'eval_batch_size': 16,
185+
186+
187+
# dataset configs
188+
'dataset': 'cifar10',
189+
'num_labels': 1500,
190+
'num_classes': 10,
191+
'img_size': 32,
192+
'crop_ratio': 0.875,
193+
'data_dir': './data',
194+
'ulb_samples_per_class': None,
195+
'lb_imb_ratio': 10,
196+
'ulb_imb_ratio': 10,
197+
'ulb_num_labels': 3000,
198+
199+
# algorithm specific configs
200+
'hard_label': True,
201+
'T': 0.5,
202+
'ema_p': 0.999,
203+
'ent_loss_ratio': 0.001,
204+
'uratio': 2,
205+
'ulb_loss_ratio': 1.0,
206+
207+
# device configs
208+
'gpu': 0,
209+
'world_size': 1,
210+
'distributed': False,
211+
"num_workers": 4,
212+
}
213+
config = get_config(config)
214+
215+
######################################################################
216+
# Then, we re-load the dataset and create data loaders for training and testing.
217+
# And we specify the model and algorithm to use.
218+
#
219+
dataset_dict = get_dataset(config, config.algorithm, config.dataset, config.num_labels, config.num_classes, data_dir=config.data_dir, include_lb_to_ulb=config.include_lb_to_ulb)
220+
train_lb_loader = get_data_loader(config, dataset_dict['train_lb'], config.batch_size)
221+
train_ulb_loader = get_data_loader(config, dataset_dict['train_ulb'], int(config.batch_size * config.uratio))
222+
eval_loader = get_data_loader(config, dataset_dict['eval'], config.eval_batch_size)
223+
algorithm = get_algorithm(config, get_net_builder(config.net, from_name=False), tb_log=None, logger=None)
224+
225+
226+
######################################################################
227+
# We can start Train the algorithms on CIFAR-10 with 40 labels now.
228+
# We train for 4000 iterations and evaluate every 500 iterations.
229+
#
230+
trainer = Trainer(config, algorithm)
231+
trainer.fit(train_lb_loader, train_ulb_loader, eval_loader)
232+
233+
234+
######################################################################
235+
# Finally, let's evaluate the trained model on the validation set.
236+
#
237+
trainer.evaluate(eval_loader)
238+
239+
240+
241+
######################################################################
242+
# References
243+
# [1] USB: https://github.com/microsoft/Semi-supervised-learning
244+
# [2] Kihyuk Sohn et al. FixMatch: Simplifying Semi-Supervised Learning with Consistency and Confidence
245+
# [3] Yidong Wang et al. FreeMatch: Self-adaptive Thresholding for Semi-supervised Learning
246+
# [4] Hao Chen et al. SoftMatch: Addressing the Quantity-Quality Trade-off in Semi-supervised Learning

0 commit comments

Comments
 (0)