From a3b97f846b1a2392f3fcc6603ae17157f990677e Mon Sep 17 00:00:00 2001 From: Brian Johnson Date: Fri, 6 Nov 2020 18:10:58 -0500 Subject: [PATCH 1/7] Rename speech_command_recognition_with_torchaudio.py to speech_command_recognition_with_torchaudio_tutorial.py Updated to run the tutorial at build time. --- ....py => speech_command_recognition_with_torchaudio_tutorial.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename intermediate_source/{speech_command_recognition_with_torchaudio.py => speech_command_recognition_with_torchaudio_tutorial.py} (100%) diff --git a/intermediate_source/speech_command_recognition_with_torchaudio.py b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py similarity index 100% rename from intermediate_source/speech_command_recognition_with_torchaudio.py rename to intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py From 547ee7b05b741665b8a8c737db4e2da5151e9beb Mon Sep 17 00:00:00 2001 From: Brian Johnson Date: Fri, 6 Nov 2020 18:12:48 -0500 Subject: [PATCH 2/7] Update index.rst --- index.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/index.rst b/index.rst index 231134d9a7a..7d40b343616 100644 --- a/index.rst +++ b/index.rst @@ -121,7 +121,7 @@ Welcome to PyTorch Tutorials :header: Speech Command Recognition :card_description: Learn how to correctly format an audio dataset and then train/test an audio classifier network on the dataset. :image: _static/img/thumbnails/cropped/torchaudio-speech.png - :link: intermediate/speech_command_recognition_with_torchaudio.html + :link: intermediate/speech_command_recognition_with_torchaudio_tutorial.html :tags: Audio .. Text @@ -467,8 +467,7 @@ Additional Resources :caption: Audio beginner/audio_preprocessing_tutorial - intermediate/speech_command_recognition_with_torchaudio - + intermediate/speech_command_recognition_with_torchaudio_tutorial .. toctree:: :maxdepth: 2 From b93e277dc5e2864eefce5501de1f4205e07a6230 Mon Sep 17 00:00:00 2001 From: Vincent Quenneville-Belair Date: Fri, 6 Nov 2020 21:18:26 -0500 Subject: [PATCH 3/7] record in colab, and also outside. move pydub to first install command. --- ...nd_recognition_with_torchaudio_tutorial.py | 49 ++++++++++++++----- 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py index 519e714abe3..1574a55ed76 100644 --- a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py +++ b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py @@ -16,22 +16,20 @@ """ -# Uncomment the following line to run in Google Colab +# Uncomment the line corresponding to your "runtime type" to run in Google Colab # CPU: -# !pip install torch==1.7.0+cpu torchvision==0.8.1+cpu torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html +# !pip install pydub torch==1.7.0+cpu torchvision==0.8.1+cpu torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html # GPU: -# !pip install torch==1.7.0+cu101 torchvision==0.8.1+cu101 torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html - -# For interactive demo at the end: -# !pip install pydub +# !pip install pydub torch==1.7.0+cu101 torchvision==0.8.1+cu101 torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import torchaudio +import sys import matplotlib.pyplot as plt import IPython.display as ipd @@ -482,11 +480,6 @@ def predict(tensor): # will record one second of audio and try to classify it. # -from google.colab import output as colab_output -from base64 import b64decode -from io import BytesIO -from pydub import AudioSegment - RECORD = """ const sleep = time => new Promise(resolve => setTimeout(resolve, time)) @@ -512,7 +505,13 @@ def predict(tensor): """ -def record(seconds=1): +def record_colab(seconds=1): + + from google.colab import output as colab_output + from base64 import b64decode + from io import BytesIO + from pydub import AudioSegment + display(ipd.Javascript(RECORD)) print(f"Recording started for {seconds} seconds.") s = colab_output.eval_js("record(%d)" % (seconds * 1000)) @@ -525,6 +524,32 @@ def record(seconds=1): return torchaudio.load(filename) +def record_noncolab(seconds=1): + + import sounddevice + import scipy.io.wavfile + + sample_rate = 44100 + + print(f"Recording started for {seconds} seconds.") + myrecording = sounddevice.rec( + int(seconds * sample_rate), samplerate=sample_rate, channels=1 + ) + sounddevice.wait() + print("Recording ended.") + + filename = "_audio.wav" + scipy.io.wavfile.write(filename, sample_rate, myrecording) + return torchaudio.load(filename) + + +# Detect whether notebook runs in google colab +if "google.colab" in sys.modules: + record = record_colab +else: + record = record_noncolab + + waveform, sample_rate = record() print(f"Predicted: {predict(waveform)}.") ipd.Audio(waveform.numpy(), rate=sample_rate) From 7f098ab7611cae3c21101d74821bc43b29a9b34b Mon Sep 17 00:00:00 2001 From: Vincent Quenneville-Belair Date: Mon, 9 Nov 2020 11:56:05 -0500 Subject: [PATCH 4/7] multiline on one line. --- ...nd_recognition_with_torchaudio_tutorial.py | 50 +++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py index 1574a55ed76..cdd1450b3e4 100644 --- a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py +++ b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py @@ -481,30 +481,6 @@ def predict(tensor): # -RECORD = """ -const sleep = time => new Promise(resolve => setTimeout(resolve, time)) -const b2text = blob => new Promise(resolve => { - const reader = new FileReader() - reader.onloadend = e => resolve(e.srcElement.result) - reader.readAsDataURL(blob) -}) -var record = time => new Promise(async resolve => { - stream = await navigator.mediaDevices.getUserMedia({ audio: true }) - recorder = new MediaRecorder(stream) - chunks = [] - recorder.ondataavailable = e => chunks.push(e.data) - recorder.start() - await sleep(time) - recorder.onstop = async ()=>{ - blob = new Blob(chunks) - text = await b2text(blob) - resolve(text) - } - recorder.stop() -}) -""" - - def record_colab(seconds=1): from google.colab import output as colab_output @@ -512,8 +488,32 @@ def record_colab(seconds=1): from io import BytesIO from pydub import AudioSegment - display(ipd.Javascript(RECORD)) + RECORD = ( + b"const sleep = time => new Promise(resolve => setTimeout(resolve, time))\n" + b"const b2text = blob => new Promise(resolve => {\n" + b" const reader = new FileReader()\n" + b" reader.onloadend = e => resolve(e.srcElement.result)\n" + b" reader.readAsDataURL(blob)\n" + b"})\n" + b"var record = time => new Promise(async resolve => {\n" + b" stream = await navigator.mediaDevices.getUserMedia({ audio: true })\n" + b" recorder = new MediaRecorder(stream)\n" + b" chunks = []\n" + b" recorder.ondataavailable = e => chunks.push(e.data)\n" + b" recorder.start()\n" + b" await sleep(time)\n" + b" recorder.onstop = async ()=>{\n" + b" blob = new Blob(chunks)\n" + b" text = await b2text(blob)\n" + b" resolve(text)\n" + b" }\n" + b" recorder.stop()\n" + b"})" + ) + RECORD = RECORD.decode("ascii") + print(f"Recording started for {seconds} seconds.") + display(ipd.Javascript(RECORD)) s = colab_output.eval_js("record(%d)" % (seconds * 1000)) print("Recording ended.") b = b64decode(s.split(",")[1]) From 4f30a8a6a21bf3cf844c49b91893103484e2f121 Mon Sep 17 00:00:00 2001 From: Vincent Quenneville-Belair Date: Tue, 17 Nov 2020 16:07:39 -0500 Subject: [PATCH 5/7] remove noncolab support. --- ...nd_recognition_with_torchaudio_tutorial.py | 32 +++---------------- 1 file changed, 4 insertions(+), 28 deletions(-) diff --git a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py index cdd1450b3e4..75914c2205a 100644 --- a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py +++ b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py @@ -481,7 +481,7 @@ def predict(tensor): # -def record_colab(seconds=1): +def record(seconds=1): from google.colab import output as colab_output from base64 import b64decode @@ -524,35 +524,11 @@ def record_colab(seconds=1): return torchaudio.load(filename) -def record_noncolab(seconds=1): - - import sounddevice - import scipy.io.wavfile - - sample_rate = 44100 - - print(f"Recording started for {seconds} seconds.") - myrecording = sounddevice.rec( - int(seconds * sample_rate), samplerate=sample_rate, channels=1 - ) - sounddevice.wait() - print("Recording ended.") - - filename = "_audio.wav" - scipy.io.wavfile.write(filename, sample_rate, myrecording) - return torchaudio.load(filename) - - # Detect whether notebook runs in google colab if "google.colab" in sys.modules: - record = record_colab -else: - record = record_noncolab - - -waveform, sample_rate = record() -print(f"Predicted: {predict(waveform)}.") -ipd.Audio(waveform.numpy(), rate=sample_rate) + waveform, sample_rate = record() + print(f"Predicted: {predict(waveform)}.") + ipd.Audio(waveform.numpy(), rate=sample_rate) ###################################################################### From f5b9169f9643ecc1c58540fd52396db6204bf65e Mon Sep 17 00:00:00 2001 From: Vincent Quenneville-Belair Date: Wed, 18 Nov 2020 15:19:48 -0500 Subject: [PATCH 6/7] text tqdm. --- .../speech_command_recognition_with_torchaudio_tutorial.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py index 75914c2205a..0627949aecf 100644 --- a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py +++ b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py @@ -33,7 +33,8 @@ import matplotlib.pyplot as plt import IPython.display as ipd -from tqdm.notebook import tqdm + +from tqdm import tqdm ###################################################################### From 0ed65270644b4ccf6061eac1fdbb4964c304acb8 Mon Sep 17 00:00:00 2001 From: Vincent Quenneville-Belair Date: Thu, 19 Nov 2020 15:19:12 -0500 Subject: [PATCH 7/7] Revert "remove noncolab support." This reverts commit 4f30a8a6a21bf3cf844c49b91893103484e2f121. --- ...nd_recognition_with_torchaudio_tutorial.py | 32 ++++++++++++++++--- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py index 0627949aecf..506ba06f571 100644 --- a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py +++ b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py @@ -482,7 +482,7 @@ def predict(tensor): # -def record(seconds=1): +def record_colab(seconds=1): from google.colab import output as colab_output from base64 import b64decode @@ -525,11 +525,35 @@ def record(seconds=1): return torchaudio.load(filename) +def record_noncolab(seconds=1): + + import sounddevice + import scipy.io.wavfile + + sample_rate = 44100 + + print(f"Recording started for {seconds} seconds.") + myrecording = sounddevice.rec( + int(seconds * sample_rate), samplerate=sample_rate, channels=1 + ) + sounddevice.wait() + print("Recording ended.") + + filename = "_audio.wav" + scipy.io.wavfile.write(filename, sample_rate, myrecording) + return torchaudio.load(filename) + + # Detect whether notebook runs in google colab if "google.colab" in sys.modules: - waveform, sample_rate = record() - print(f"Predicted: {predict(waveform)}.") - ipd.Audio(waveform.numpy(), rate=sample_rate) + record = record_colab +else: + record = record_noncolab + + +waveform, sample_rate = record() +print(f"Predicted: {predict(waveform)}.") +ipd.Audio(waveform.numpy(), rate=sample_rate) ######################################################################