Skip to content

Commit f1fd16f

Browse files
vincentqbbrianjoholly1238
authored
Fix name error in audio tutorial (#1223)
* Rename speech_command_recognition_with_torchaudio.py to speech_command_recognition_with_torchaudio_tutorial.py Updated to run the tutorial at build time. * Update index.rst * record in colab, and also outside. move pydub to first install command. * multiline on one line. * remove noncolab support. * text tqdm. Co-authored-by: Brian Johnson <brianjo@fb.com> Co-authored-by: holly1238 <77758406+holly1238@users.noreply.github.com>
1 parent 760455d commit f1fd16f

File tree

2 files changed

+44
-43
lines changed

2 files changed

+44
-43
lines changed

index.rst

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ Welcome to PyTorch Tutorials
132132
:header: Speech Command Recognition
133133
:card_description: Learn how to correctly format an audio dataset and then train/test an audio classifier network on the dataset.
134134
:image: _static/img/thumbnails/cropped/torchaudio-speech.png
135-
:link: intermediate/speech_command_recognition_with_torchaudio.html
135+
:link: intermediate/speech_command_recognition_with_torchaudio_tutorial.html
136136
:tags: Audio
137137

138138
.. Text
@@ -578,8 +578,7 @@ Additional Resources
578578
:caption: Audio
579579

580580
beginner/audio_preprocessing_tutorial
581-
intermediate/speech_command_recognition_with_torchaudio
582-
581+
intermediate/speech_command_recognition_with_torchaudio_tutorial
583582

584583
.. toctree::
585584
:maxdepth: 2

intermediate_source/speech_command_recognition_with_torchaudio.py renamed to intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py

Lines changed: 42 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -16,26 +16,25 @@
1616
1717
"""
1818

19-
# Uncomment the following line to run in Google Colab
19+
# Uncomment the line corresponding to your "runtime type" to run in Google Colab
2020

2121
# CPU:
22-
# !pip install torch==1.7.0+cpu torchvision==0.8.1+cpu torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
22+
# !pip install pydub torch==1.7.0+cpu torchvision==0.8.1+cpu torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
2323

2424
# GPU:
25-
# !pip install torch==1.7.0+cu101 torchvision==0.8.1+cu101 torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
26-
27-
# For interactive demo at the end:
28-
# !pip install pydub
25+
# !pip install pydub torch==1.7.0+cu101 torchvision==0.8.1+cu101 torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
2926

3027
import torch
3128
import torch.nn as nn
3229
import torch.nn.functional as F
3330
import torch.optim as optim
3431
import torchaudio
32+
import sys
3533

3634
import matplotlib.pyplot as plt
3735
import IPython.display as ipd
38-
from tqdm.notebook import tqdm
36+
37+
from tqdm import tqdm
3938

4039

4140
######################################################################
@@ -482,39 +481,40 @@ def predict(tensor):
482481
# will record one second of audio and try to classify it.
483482
#
484483

485-
from google.colab import output as colab_output
486-
from base64 import b64decode
487-
from io import BytesIO
488-
from pydub import AudioSegment
489-
490-
491-
RECORD = """
492-
const sleep = time => new Promise(resolve => setTimeout(resolve, time))
493-
const b2text = blob => new Promise(resolve => {
494-
const reader = new FileReader()
495-
reader.onloadend = e => resolve(e.srcElement.result)
496-
reader.readAsDataURL(blob)
497-
})
498-
var record = time => new Promise(async resolve => {
499-
stream = await navigator.mediaDevices.getUserMedia({ audio: true })
500-
recorder = new MediaRecorder(stream)
501-
chunks = []
502-
recorder.ondataavailable = e => chunks.push(e.data)
503-
recorder.start()
504-
await sleep(time)
505-
recorder.onstop = async ()=>{
506-
blob = new Blob(chunks)
507-
text = await b2text(blob)
508-
resolve(text)
509-
}
510-
recorder.stop()
511-
})
512-
"""
513-
514484

515485
def record(seconds=1):
516-
display(ipd.Javascript(RECORD))
486+
487+
from google.colab import output as colab_output
488+
from base64 import b64decode
489+
from io import BytesIO
490+
from pydub import AudioSegment
491+
492+
RECORD = (
493+
b"const sleep = time => new Promise(resolve => setTimeout(resolve, time))\n"
494+
b"const b2text = blob => new Promise(resolve => {\n"
495+
b" const reader = new FileReader()\n"
496+
b" reader.onloadend = e => resolve(e.srcElement.result)\n"
497+
b" reader.readAsDataURL(blob)\n"
498+
b"})\n"
499+
b"var record = time => new Promise(async resolve => {\n"
500+
b" stream = await navigator.mediaDevices.getUserMedia({ audio: true })\n"
501+
b" recorder = new MediaRecorder(stream)\n"
502+
b" chunks = []\n"
503+
b" recorder.ondataavailable = e => chunks.push(e.data)\n"
504+
b" recorder.start()\n"
505+
b" await sleep(time)\n"
506+
b" recorder.onstop = async ()=>{\n"
507+
b" blob = new Blob(chunks)\n"
508+
b" text = await b2text(blob)\n"
509+
b" resolve(text)\n"
510+
b" }\n"
511+
b" recorder.stop()\n"
512+
b"})"
513+
)
514+
RECORD = RECORD.decode("ascii")
515+
517516
print(f"Recording started for {seconds} seconds.")
517+
display(ipd.Javascript(RECORD))
518518
s = colab_output.eval_js("record(%d)" % (seconds * 1000))
519519
print("Recording ended.")
520520
b = b64decode(s.split(",")[1])
@@ -525,9 +525,11 @@ def record(seconds=1):
525525
return torchaudio.load(filename)
526526

527527

528-
waveform, sample_rate = record()
529-
print(f"Predicted: {predict(waveform)}.")
530-
ipd.Audio(waveform.numpy(), rate=sample_rate)
528+
# Detect whether notebook runs in google colab
529+
if "google.colab" in sys.modules:
530+
waveform, sample_rate = record()
531+
print(f"Predicted: {predict(waveform)}.")
532+
ipd.Audio(waveform.numpy(), rate=sample_rate)
531533

532534

533535
######################################################################

0 commit comments

Comments
 (0)