From a3b97f846b1a2392f3fcc6603ae17157f990677e Mon Sep 17 00:00:00 2001 From: Brian Johnson Date: Fri, 6 Nov 2020 18:10:58 -0500 Subject: [PATCH 1/5] Rename speech_command_recognition_with_torchaudio.py to speech_command_recognition_with_torchaudio_tutorial.py Updated to run the tutorial at build time. --- ....py => speech_command_recognition_with_torchaudio_tutorial.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename intermediate_source/{speech_command_recognition_with_torchaudio.py => speech_command_recognition_with_torchaudio_tutorial.py} (100%) diff --git a/intermediate_source/speech_command_recognition_with_torchaudio.py b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py similarity index 100% rename from intermediate_source/speech_command_recognition_with_torchaudio.py rename to intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py From 547ee7b05b741665b8a8c737db4e2da5151e9beb Mon Sep 17 00:00:00 2001 From: Brian Johnson Date: Fri, 6 Nov 2020 18:12:48 -0500 Subject: [PATCH 2/5] Update index.rst --- index.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/index.rst b/index.rst index 231134d9a7a..7d40b343616 100644 --- a/index.rst +++ b/index.rst @@ -121,7 +121,7 @@ Welcome to PyTorch Tutorials :header: Speech Command Recognition :card_description: Learn how to correctly format an audio dataset and then train/test an audio classifier network on the dataset. :image: _static/img/thumbnails/cropped/torchaudio-speech.png - :link: intermediate/speech_command_recognition_with_torchaudio.html + :link: intermediate/speech_command_recognition_with_torchaudio_tutorial.html :tags: Audio .. Text @@ -467,8 +467,7 @@ Additional Resources :caption: Audio beginner/audio_preprocessing_tutorial - intermediate/speech_command_recognition_with_torchaudio - + intermediate/speech_command_recognition_with_torchaudio_tutorial .. toctree:: :maxdepth: 2 From 42442e377695c38530a9b7c5b0fe85c9d4fe5ac7 Mon Sep 17 00:00:00 2001 From: Vincent Quenneville-Belair Date: Fri, 6 Nov 2020 21:18:26 -0500 Subject: [PATCH 3/5] record in colab, and also outside. move pydub to first install command. --- ...nd_recognition_with_torchaudio_tutorial.py | 47 ++++++++++++++----- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py index 519e714abe3..84377565f84 100644 --- a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py +++ b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py @@ -16,22 +16,20 @@ """ -# Uncomment the following line to run in Google Colab +# Uncomment the line corresponding to your "runtime type" to run in Google Colab # CPU: -# !pip install torch==1.7.0+cpu torchvision==0.8.1+cpu torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html +# !pip install pydub torch==1.7.0+cpu torchvision==0.8.1+cpu torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html # GPU: -# !pip install torch==1.7.0+cu101 torchvision==0.8.1+cu101 torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html - -# For interactive demo at the end: -# !pip install pydub +# !pip install pydub torch==1.7.0+cu101 torchvision==0.8.1+cu101 torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import torchaudio +import sys import matplotlib.pyplot as plt import IPython.display as ipd @@ -482,11 +480,6 @@ def predict(tensor): # will record one second of audio and try to classify it. # -from google.colab import output as colab_output -from base64 import b64decode -from io import BytesIO -from pydub import AudioSegment - RECORD = """ const sleep = time => new Promise(resolve => setTimeout(resolve, time)) @@ -513,6 +506,12 @@ def predict(tensor): def record(seconds=1): + + from google.colab import output as colab_output + from base64 import b64decode + from io import BytesIO + from pydub import AudioSegment + display(ipd.Javascript(RECORD)) print(f"Recording started for {seconds} seconds.") s = colab_output.eval_js("record(%d)" % (seconds * 1000)) @@ -525,6 +524,32 @@ def record(seconds=1): return torchaudio.load(filename) +def record_noncolab(seconds=1): + + import sounddevice + import scipy.io.wavfile + + sample_rate = 44100 + + print(f"Recording started for {seconds} seconds.") + myrecording = sounddevice.rec( + int(seconds * sample_rate), samplerate=sample_rate, channels=1 + ) + sounddevice.wait() + print("Recording ended.") + + filename = "_audio.wav" + scipy.io.wavfile.write(filename, sample_rate, myrecording) + return torchaudio.load(filename) + + +# Detect whether notebook runs in google colab +if "google.colab" in sys.modules: + record = record_colab +else: + record = record_noncolab + + waveform, sample_rate = record() print(f"Predicted: {predict(waveform)}.") ipd.Audio(waveform.numpy(), rate=sample_rate) From 13452b86336b9683024bb2776ed1d458db2d0e5f Mon Sep 17 00:00:00 2001 From: Vincent Quenneville-Belair Date: Mon, 9 Nov 2020 11:56:05 -0500 Subject: [PATCH 4/5] multiline on one line. --- ...nd_recognition_with_torchaudio_tutorial.py | 50 +++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py index 84377565f84..9ec3eeca42f 100644 --- a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py +++ b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py @@ -481,30 +481,6 @@ def predict(tensor): # -RECORD = """ -const sleep = time => new Promise(resolve => setTimeout(resolve, time)) -const b2text = blob => new Promise(resolve => { - const reader = new FileReader() - reader.onloadend = e => resolve(e.srcElement.result) - reader.readAsDataURL(blob) -}) -var record = time => new Promise(async resolve => { - stream = await navigator.mediaDevices.getUserMedia({ audio: true }) - recorder = new MediaRecorder(stream) - chunks = [] - recorder.ondataavailable = e => chunks.push(e.data) - recorder.start() - await sleep(time) - recorder.onstop = async ()=>{ - blob = new Blob(chunks) - text = await b2text(blob) - resolve(text) - } - recorder.stop() -}) -""" - - def record(seconds=1): from google.colab import output as colab_output @@ -512,8 +488,32 @@ def record(seconds=1): from io import BytesIO from pydub import AudioSegment - display(ipd.Javascript(RECORD)) + RECORD = ( + b"const sleep = time => new Promise(resolve => setTimeout(resolve, time))\n" + b"const b2text = blob => new Promise(resolve => {\n" + b" const reader = new FileReader()\n" + b" reader.onloadend = e => resolve(e.srcElement.result)\n" + b" reader.readAsDataURL(blob)\n" + b"})\n" + b"var record = time => new Promise(async resolve => {\n" + b" stream = await navigator.mediaDevices.getUserMedia({ audio: true })\n" + b" recorder = new MediaRecorder(stream)\n" + b" chunks = []\n" + b" recorder.ondataavailable = e => chunks.push(e.data)\n" + b" recorder.start()\n" + b" await sleep(time)\n" + b" recorder.onstop = async ()=>{\n" + b" blob = new Blob(chunks)\n" + b" text = await b2text(blob)\n" + b" resolve(text)\n" + b" }\n" + b" recorder.stop()\n" + b"})" + ) + RECORD = RECORD.decode("ascii") + print(f"Recording started for {seconds} seconds.") + display(ipd.Javascript(RECORD)) s = colab_output.eval_js("record(%d)" % (seconds * 1000)) print("Recording ended.") b = b64decode(s.split(",")[1]) From 301b97fdd0756d9f80260a2a5c12bce257d625f5 Mon Sep 17 00:00:00 2001 From: Vincent Quenneville-Belair Date: Tue, 17 Nov 2020 16:07:39 -0500 Subject: [PATCH 5/5] remove noncolab support. --- ...nd_recognition_with_torchaudio_tutorial.py | 30 ++----------------- 1 file changed, 3 insertions(+), 27 deletions(-) diff --git a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py index 9ec3eeca42f..75914c2205a 100644 --- a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py +++ b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py @@ -524,35 +524,11 @@ def record(seconds=1): return torchaudio.load(filename) -def record_noncolab(seconds=1): - - import sounddevice - import scipy.io.wavfile - - sample_rate = 44100 - - print(f"Recording started for {seconds} seconds.") - myrecording = sounddevice.rec( - int(seconds * sample_rate), samplerate=sample_rate, channels=1 - ) - sounddevice.wait() - print("Recording ended.") - - filename = "_audio.wav" - scipy.io.wavfile.write(filename, sample_rate, myrecording) - return torchaudio.load(filename) - - # Detect whether notebook runs in google colab if "google.colab" in sys.modules: - record = record_colab -else: - record = record_noncolab - - -waveform, sample_rate = record() -print(f"Predicted: {predict(waveform)}.") -ipd.Audio(waveform.numpy(), rate=sample_rate) + waveform, sample_rate = record() + print(f"Predicted: {predict(waveform)}.") + ipd.Audio(waveform.numpy(), rate=sample_rate) ######################################################################