Skip to content

Commit 697fef1

Browse files
committed
Redirect audio tutorials to torchaudio
1 parent 48c31c4 commit 697fef1

10 files changed

+43
-3661
lines changed

beginner_source/audio_data_augmentation_tutorial.py

Lines changed: 4 additions & 436 deletions
Large diffs are not rendered by default.

beginner_source/audio_datasets_tutorial.py

Lines changed: 5 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -3,85 +3,9 @@
33
Audio Datasets
44
==============
55
6-
``torchaudio`` provides easy access to common, publicly accessible
7-
datasets. Please refer to the official documentation for the list of
8-
available datasets.
9-
"""
10-
11-
# When running this tutorial in Google Colab, install the required packages
12-
# with the following.
13-
# !pip install torchaudio
14-
15-
import torch
16-
import torchaudio
17-
18-
print(torch.__version__)
19-
print(torchaudio.__version__)
20-
21-
######################################################################
22-
# Preparing data and utility functions (skip this section)
23-
# --------------------------------------------------------
24-
#
25-
26-
# @title Prepare data and utility functions. {display-mode: "form"}
27-
# @markdown
28-
# @markdown You do not need to look into this cell.
29-
# @markdown Just execute once and you are good to go.
30-
31-
# -------------------------------------------------------------------------------
32-
# Preparation of data and helper functions.
33-
# -------------------------------------------------------------------------------
34-
import multiprocessing
35-
import os
36-
37-
import matplotlib.pyplot as plt
38-
from IPython.display import Audio, display
39-
40-
41-
_SAMPLE_DIR = "_assets"
42-
YESNO_DATASET_PATH = os.path.join(_SAMPLE_DIR, "yes_no")
43-
os.makedirs(YESNO_DATASET_PATH, exist_ok=True)
44-
6+
This tutorial has been moved to https://pytorch.org/tutorials/beginner/audio_datasets_tutorial.html
457
46-
def plot_specgram(waveform, sample_rate, title="Spectrogram", xlim=None):
47-
waveform = waveform.numpy()
48-
49-
num_channels, num_frames = waveform.shape
50-
51-
figure, axes = plt.subplots(num_channels, 1)
52-
if num_channels == 1:
53-
axes = [axes]
54-
for c in range(num_channels):
55-
axes[c].specgram(waveform[c], Fs=sample_rate)
56-
if num_channels > 1:
57-
axes[c].set_ylabel(f"Channel {c+1}")
58-
if xlim:
59-
axes[c].set_xlim(xlim)
60-
figure.suptitle(title)
61-
plt.show(block=False)
62-
63-
64-
def play_audio(waveform, sample_rate):
65-
waveform = waveform.numpy()
66-
67-
num_channels, num_frames = waveform.shape
68-
if num_channels == 1:
69-
display(Audio(waveform[0], rate=sample_rate))
70-
elif num_channels == 2:
71-
display(Audio((waveform[0], waveform[1]), rate=sample_rate))
72-
else:
73-
raise ValueError("Waveform with more than 2 channels are not supported.")
74-
75-
76-
######################################################################
77-
# Here, we show how to use the
78-
# :py:func:`torchaudio.datasets.YESNO` dataset.
79-
#
80-
81-
82-
dataset = torchaudio.datasets.YESNO(YESNO_DATASET_PATH, download=True)
83-
84-
for i in [1, 3, 5]:
85-
waveform, sample_rate, label = dataset[i]
86-
plot_specgram(waveform, sample_rate, title=f"Sample {i}: {label}")
87-
play_audio(waveform, sample_rate)
8+
It will redirect in 3 seconds.
9+
.. raw::html
10+
<meta http-equiv="Refresh" content="3; url='https://pytorch.org/tutorials/beginner/audio_datasets_tutorial.html'" />
11+
"""

beginner_source/audio_feature_augmentation_tutorial.py

Lines changed: 5 additions & 162 deletions
Original file line numberDiff line numberDiff line change
@@ -2,167 +2,10 @@
22
"""
33
Audio Feature Augmentation
44
==========================
5-
"""
6-
7-
# When running this tutorial in Google Colab, install the required packages
8-
# with the following.
9-
# !pip install torchaudio librosa
10-
11-
import torch
12-
import torchaudio
13-
import torchaudio.transforms as T
14-
15-
print(torch.__version__)
16-
print(torchaudio.__version__)
17-
18-
######################################################################
19-
# Preparing data and utility functions (skip this section)
20-
# --------------------------------------------------------
21-
#
22-
23-
# @title Prepare data and utility functions. {display-mode: "form"}
24-
# @markdown
25-
# @markdown You do not need to look into this cell.
26-
# @markdown Just execute once and you are good to go.
27-
# @markdown
28-
# @markdown In this tutorial, we will use a speech data from [VOiCES dataset](https://iqtlabs.github.io/voices/),
29-
# @markdown which is licensed under Creative Commos BY 4.0.
30-
31-
# -------------------------------------------------------------------------------
32-
# Preparation of data and helper functions.
33-
# -------------------------------------------------------------------------------
34-
35-
import os
36-
37-
import librosa
38-
import matplotlib.pyplot as plt
39-
import requests
40-
41-
42-
_SAMPLE_DIR = "_assets"
43-
44-
SAMPLE_WAV_SPEECH_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/VOiCES_devkit/source-16k/train/sp0307/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav" # noqa: E501
45-
SAMPLE_WAV_SPEECH_PATH = os.path.join(_SAMPLE_DIR, "speech.wav")
46-
47-
os.makedirs(_SAMPLE_DIR, exist_ok=True)
48-
49-
50-
def _fetch_data():
51-
uri = [
52-
(SAMPLE_WAV_SPEECH_URL, SAMPLE_WAV_SPEECH_PATH),
53-
]
54-
for url, path in uri:
55-
with open(path, "wb") as file_:
56-
file_.write(requests.get(url).content)
57-
58-
59-
_fetch_data()
60-
61-
62-
def _get_sample(path, resample=None):
63-
effects = [["remix", "1"]]
64-
if resample:
65-
effects.extend(
66-
[
67-
["lowpass", f"{resample // 2}"],
68-
["rate", f"{resample}"],
69-
]
70-
)
71-
return torchaudio.sox_effects.apply_effects_file(path, effects=effects)
72-
73-
74-
def get_speech_sample(*, resample=None):
75-
return _get_sample(SAMPLE_WAV_SPEECH_PATH, resample=resample)
76-
775
78-
def get_spectrogram(
79-
n_fft=400,
80-
win_len=None,
81-
hop_len=None,
82-
power=2.0,
83-
):
84-
waveform, _ = get_speech_sample()
85-
spectrogram = T.Spectrogram(
86-
n_fft=n_fft,
87-
win_length=win_len,
88-
hop_length=hop_len,
89-
center=True,
90-
pad_mode="reflect",
91-
power=power,
92-
)
93-
return spectrogram(waveform)
6+
This tutorial has been moved to https://pytorch.org/audio/stable/tutorials/audio_data_augmentation_tutorial.html
947
95-
96-
def plot_spectrogram(spec, title=None, ylabel="freq_bin", aspect="auto", xmax=None):
97-
fig, axs = plt.subplots(1, 1)
98-
axs.set_title(title or "Spectrogram (db)")
99-
axs.set_ylabel(ylabel)
100-
axs.set_xlabel("frame")
101-
im = axs.imshow(librosa.power_to_db(spec), origin="lower", aspect=aspect)
102-
if xmax:
103-
axs.set_xlim((0, xmax))
104-
fig.colorbar(im, ax=axs)
105-
plt.show(block=False)
106-
107-
108-
######################################################################
109-
# SpecAugment
110-
# -----------
111-
#
112-
# `SpecAugment <https://ai.googleblog.com/2019/04/specaugment-new-data-augmentation.html>`__
113-
# is a popular spectrogram augmentation technique.
114-
#
115-
# ``torchaudio`` implements :py:func:`torchaudio.transforms.TimeStretch`,
116-
# :py:func:`torchaudio.transforms.TimeMasking` and
117-
# :py:func:`torchaudio.transforms.FrequencyMasking`.
118-
#
119-
120-
######################################################################
121-
# TimeStretch
122-
# -----------
123-
#
124-
125-
126-
spec = get_spectrogram(power=None)
127-
stretch = T.TimeStretch()
128-
129-
rate = 1.2
130-
spec_ = stretch(spec, rate)
131-
plot_spectrogram(torch.abs(spec_[0]), title=f"Stretched x{rate}", aspect="equal", xmax=304)
132-
133-
plot_spectrogram(torch.abs(spec[0]), title="Original", aspect="equal", xmax=304)
134-
135-
rate = 0.9
136-
spec_ = stretch(spec, rate)
137-
plot_spectrogram(torch.abs(spec_[0]), title=f"Stretched x{rate}", aspect="equal", xmax=304)
138-
139-
######################################################################
140-
# TimeMasking
141-
# -----------
142-
#
143-
144-
torch.random.manual_seed(4)
145-
146-
spec = get_spectrogram()
147-
plot_spectrogram(spec[0], title="Original")
148-
149-
masking = T.TimeMasking(time_mask_param=80)
150-
spec = masking(spec)
151-
152-
plot_spectrogram(spec[0], title="Masked along time axis")
153-
154-
######################################################################
155-
# FrequencyMasking
156-
# ----------------
157-
#
158-
159-
160-
torch.random.manual_seed(4)
161-
162-
spec = get_spectrogram()
163-
plot_spectrogram(spec[0], title="Original")
164-
165-
masking = T.FrequencyMasking(freq_mask_param=80)
166-
spec = masking(spec)
167-
168-
plot_spectrogram(spec[0], title="Masked along frequency axis")
8+
It will redirect in 3 seconds.
9+
.. raw::html
10+
<meta http-equiv="Refresh" content="3; url='https://pytorch.org/audio/stable/tutorials/audio_data_augmentation_tutorial.html'" />
11+
"""

0 commit comments

Comments
 (0)