From a3b97f846b1a2392f3fcc6603ae17157f990677e Mon Sep 17 00:00:00 2001
From: Brian Johnson <brianjo@fb.com>
Date: Fri, 6 Nov 2020 18:10:58 -0500
Subject: [PATCH 1/5] Rename speech_command_recognition_with_torchaudio.py to
 speech_command_recognition_with_torchaudio_tutorial.py

Updated to run the tutorial at build time.
---
 ....py => speech_command_recognition_with_torchaudio_tutorial.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename intermediate_source/{speech_command_recognition_with_torchaudio.py => speech_command_recognition_with_torchaudio_tutorial.py} (100%)

diff --git a/intermediate_source/speech_command_recognition_with_torchaudio.py b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py
similarity index 100%
rename from intermediate_source/speech_command_recognition_with_torchaudio.py
rename to intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py

From 547ee7b05b741665b8a8c737db4e2da5151e9beb Mon Sep 17 00:00:00 2001
From: Brian Johnson <brianjo@fb.com>
Date: Fri, 6 Nov 2020 18:12:48 -0500
Subject: [PATCH 2/5] Update index.rst

---
 index.rst | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/index.rst b/index.rst
index 231134d9a7a..7d40b343616 100644
--- a/index.rst
+++ b/index.rst
@@ -121,7 +121,7 @@ Welcome to PyTorch Tutorials
    :header: Speech Command Recognition
    :card_description: Learn how to correctly format an audio dataset and then train/test an audio classifier network on the dataset.
    :image: _static/img/thumbnails/cropped/torchaudio-speech.png
-   :link: intermediate/speech_command_recognition_with_torchaudio.html
+   :link: intermediate/speech_command_recognition_with_torchaudio_tutorial.html
    :tags: Audio
 
 .. Text
@@ -467,8 +467,7 @@ Additional Resources
    :caption: Audio
 
    beginner/audio_preprocessing_tutorial
-   intermediate/speech_command_recognition_with_torchaudio
-   
+   intermediate/speech_command_recognition_with_torchaudio_tutorial
 
 .. toctree::
    :maxdepth: 2

From 42442e377695c38530a9b7c5b0fe85c9d4fe5ac7 Mon Sep 17 00:00:00 2001
From: Vincent Quenneville-Belair <vincentqb@gmail.com>
Date: Fri, 6 Nov 2020 21:18:26 -0500
Subject: [PATCH 3/5] record in colab, and also outside. move pydub to first
 install command.

---
 ...nd_recognition_with_torchaudio_tutorial.py | 47 ++++++++++++++-----
 1 file changed, 36 insertions(+), 11 deletions(-)

diff --git a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py
index 519e714abe3..84377565f84 100644
--- a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py
+++ b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py
@@ -16,22 +16,20 @@
 
 """
 
-# Uncomment the following line to run in Google Colab
+# Uncomment the line corresponding to your "runtime type" to run in Google Colab
 
 # CPU:
-# !pip install torch==1.7.0+cpu torchvision==0.8.1+cpu torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
+# !pip install pydub torch==1.7.0+cpu torchvision==0.8.1+cpu torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
 
 # GPU:
-# !pip install torch==1.7.0+cu101 torchvision==0.8.1+cu101 torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
-
-# For interactive demo at the end:
-# !pip install pydub
+# !pip install pydub torch==1.7.0+cu101 torchvision==0.8.1+cu101 torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
 
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.optim as optim
 import torchaudio
+import sys
 
 import matplotlib.pyplot as plt
 import IPython.display as ipd
@@ -482,11 +480,6 @@ def predict(tensor):
 # will record one second of audio and try to classify it.
 #
 
-from google.colab import output as colab_output
-from base64 import b64decode
-from io import BytesIO
-from pydub import AudioSegment
-
 
 RECORD = """
 const sleep  = time => new Promise(resolve => setTimeout(resolve, time))
@@ -513,6 +506,12 @@ def predict(tensor):
 
 
 def record(seconds=1):
+
+    from google.colab import output as colab_output
+    from base64 import b64decode
+    from io import BytesIO
+    from pydub import AudioSegment
+
     display(ipd.Javascript(RECORD))
     print(f"Recording started for {seconds} seconds.")
     s = colab_output.eval_js("record(%d)" % (seconds * 1000))
@@ -525,6 +524,32 @@ def record(seconds=1):
     return torchaudio.load(filename)
 
 
+def record_noncolab(seconds=1):
+
+    import sounddevice
+    import scipy.io.wavfile
+
+    sample_rate = 44100
+
+    print(f"Recording started for {seconds} seconds.")
+    myrecording = sounddevice.rec(
+        int(seconds * sample_rate), samplerate=sample_rate, channels=1
+    )
+    sounddevice.wait()
+    print("Recording ended.")
+
+    filename = "_audio.wav"
+    scipy.io.wavfile.write(filename, sample_rate, myrecording)
+    return torchaudio.load(filename)
+
+
+# Detect whether notebook runs in google colab
+if "google.colab" in sys.modules:
+    record = record_colab
+else:
+    record = record_noncolab
+
+
 waveform, sample_rate = record()
 print(f"Predicted: {predict(waveform)}.")
 ipd.Audio(waveform.numpy(), rate=sample_rate)

From 13452b86336b9683024bb2776ed1d458db2d0e5f Mon Sep 17 00:00:00 2001
From: Vincent Quenneville-Belair <vincentqb@gmail.com>
Date: Mon, 9 Nov 2020 11:56:05 -0500
Subject: [PATCH 4/5] multiline on one line.

---
 ...nd_recognition_with_torchaudio_tutorial.py | 50 +++++++++----------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py
index 84377565f84..9ec3eeca42f 100644
--- a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py
+++ b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py
@@ -481,30 +481,6 @@ def predict(tensor):
 #
 
 
-RECORD = """
-const sleep  = time => new Promise(resolve => setTimeout(resolve, time))
-const b2text = blob => new Promise(resolve => {
-  const reader = new FileReader()
-  reader.onloadend = e => resolve(e.srcElement.result)
-  reader.readAsDataURL(blob)
-})
-var record = time => new Promise(async resolve => {
-  stream = await navigator.mediaDevices.getUserMedia({ audio: true })
-  recorder = new MediaRecorder(stream)
-  chunks = []
-  recorder.ondataavailable = e => chunks.push(e.data)
-  recorder.start()
-  await sleep(time)
-  recorder.onstop = async ()=>{
-    blob = new Blob(chunks)
-    text = await b2text(blob)
-    resolve(text)
-  }
-  recorder.stop()
-})
-"""
-
-
 def record(seconds=1):
 
     from google.colab import output as colab_output
@@ -512,8 +488,32 @@ def record(seconds=1):
     from io import BytesIO
     from pydub import AudioSegment
 
-    display(ipd.Javascript(RECORD))
+    RECORD = (
+        b"const sleep  = time => new Promise(resolve => setTimeout(resolve, time))\n"
+        b"const b2text = blob => new Promise(resolve => {\n"
+        b"  const reader = new FileReader()\n"
+        b"  reader.onloadend = e => resolve(e.srcElement.result)\n"
+        b"  reader.readAsDataURL(blob)\n"
+        b"})\n"
+        b"var record = time => new Promise(async resolve => {\n"
+        b"  stream = await navigator.mediaDevices.getUserMedia({ audio: true })\n"
+        b"  recorder = new MediaRecorder(stream)\n"
+        b"  chunks = []\n"
+        b"  recorder.ondataavailable = e => chunks.push(e.data)\n"
+        b"  recorder.start()\n"
+        b"  await sleep(time)\n"
+        b"  recorder.onstop = async ()=>{\n"
+        b"    blob = new Blob(chunks)\n"
+        b"    text = await b2text(blob)\n"
+        b"    resolve(text)\n"
+        b"  }\n"
+        b"  recorder.stop()\n"
+        b"})"
+    )
+    RECORD = RECORD.decode("ascii")
+
     print(f"Recording started for {seconds} seconds.")
+    display(ipd.Javascript(RECORD))
     s = colab_output.eval_js("record(%d)" % (seconds * 1000))
     print("Recording ended.")
     b = b64decode(s.split(",")[1])

From 301b97fdd0756d9f80260a2a5c12bce257d625f5 Mon Sep 17 00:00:00 2001
From: Vincent Quenneville-Belair <vincentqb@gmail.com>
Date: Tue, 17 Nov 2020 16:07:39 -0500
Subject: [PATCH 5/5] remove noncolab support.

---
 ...nd_recognition_with_torchaudio_tutorial.py | 30 ++-----------------
 1 file changed, 3 insertions(+), 27 deletions(-)

diff --git a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py
index 9ec3eeca42f..75914c2205a 100644
--- a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py
+++ b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py
@@ -524,35 +524,11 @@ def record(seconds=1):
     return torchaudio.load(filename)
 
 
-def record_noncolab(seconds=1):
-
-    import sounddevice
-    import scipy.io.wavfile
-
-    sample_rate = 44100
-
-    print(f"Recording started for {seconds} seconds.")
-    myrecording = sounddevice.rec(
-        int(seconds * sample_rate), samplerate=sample_rate, channels=1
-    )
-    sounddevice.wait()
-    print("Recording ended.")
-
-    filename = "_audio.wav"
-    scipy.io.wavfile.write(filename, sample_rate, myrecording)
-    return torchaudio.load(filename)
-
-
 # Detect whether notebook runs in google colab
 if "google.colab" in sys.modules:
-    record = record_colab
-else:
-    record = record_noncolab
-
-
-waveform, sample_rate = record()
-print(f"Predicted: {predict(waveform)}.")
-ipd.Audio(waveform.numpy(), rate=sample_rate)
+    waveform, sample_rate = record()
+    print(f"Predicted: {predict(waveform)}.")
+    ipd.Audio(waveform.numpy(), rate=sample_rate)
 
 
 ######################################################################