From a3b97f846b1a2392f3fcc6603ae17157f990677e Mon Sep 17 00:00:00 2001
From: Brian Johnson <brianjo@fb.com>
Date: Fri, 6 Nov 2020 18:10:58 -0500
Subject: [PATCH 1/7] Rename speech_command_recognition_with_torchaudio.py to
 speech_command_recognition_with_torchaudio_tutorial.py

Updated to run the tutorial at build time.
---
 ....py => speech_command_recognition_with_torchaudio_tutorial.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename intermediate_source/{speech_command_recognition_with_torchaudio.py => speech_command_recognition_with_torchaudio_tutorial.py} (100%)

diff --git a/intermediate_source/speech_command_recognition_with_torchaudio.py b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py
similarity index 100%
rename from intermediate_source/speech_command_recognition_with_torchaudio.py
rename to intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py

From 547ee7b05b741665b8a8c737db4e2da5151e9beb Mon Sep 17 00:00:00 2001
From: Brian Johnson <brianjo@fb.com>
Date: Fri, 6 Nov 2020 18:12:48 -0500
Subject: [PATCH 2/7] Update index.rst

---
 index.rst | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/index.rst b/index.rst
index 231134d9a7a..7d40b343616 100644
--- a/index.rst
+++ b/index.rst
@@ -121,7 +121,7 @@ Welcome to PyTorch Tutorials
    :header: Speech Command Recognition
    :card_description: Learn how to correctly format an audio dataset and then train/test an audio classifier network on the dataset.
    :image: _static/img/thumbnails/cropped/torchaudio-speech.png
-   :link: intermediate/speech_command_recognition_with_torchaudio.html
+   :link: intermediate/speech_command_recognition_with_torchaudio_tutorial.html
    :tags: Audio
 
 .. Text
@@ -467,8 +467,7 @@ Additional Resources
    :caption: Audio
 
    beginner/audio_preprocessing_tutorial
-   intermediate/speech_command_recognition_with_torchaudio
-   
+   intermediate/speech_command_recognition_with_torchaudio_tutorial
 
 .. toctree::
    :maxdepth: 2

From b93e277dc5e2864eefce5501de1f4205e07a6230 Mon Sep 17 00:00:00 2001
From: Vincent Quenneville-Belair <vincentqb@gmail.com>
Date: Fri, 6 Nov 2020 21:18:26 -0500
Subject: [PATCH 3/7] record in colab, and also outside. move pydub to first
 install command.

---
 ...nd_recognition_with_torchaudio_tutorial.py | 49 ++++++++++++++-----
 1 file changed, 37 insertions(+), 12 deletions(-)

diff --git a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py
index 519e714abe3..1574a55ed76 100644
--- a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py
+++ b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py
@@ -16,22 +16,20 @@
 
 """
 
-# Uncomment the following line to run in Google Colab
+# Uncomment the line corresponding to your "runtime type" to run in Google Colab
 
 # CPU:
-# !pip install torch==1.7.0+cpu torchvision==0.8.1+cpu torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
+# !pip install pydub torch==1.7.0+cpu torchvision==0.8.1+cpu torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
 
 # GPU:
-# !pip install torch==1.7.0+cu101 torchvision==0.8.1+cu101 torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
-
-# For interactive demo at the end:
-# !pip install pydub
+# !pip install pydub torch==1.7.0+cu101 torchvision==0.8.1+cu101 torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
 
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.optim as optim
 import torchaudio
+import sys
 
 import matplotlib.pyplot as plt
 import IPython.display as ipd
@@ -482,11 +480,6 @@ def predict(tensor):
 # will record one second of audio and try to classify it.
 #
 
-from google.colab import output as colab_output
-from base64 import b64decode
-from io import BytesIO
-from pydub import AudioSegment
-
 
 RECORD = """
 const sleep  = time => new Promise(resolve => setTimeout(resolve, time))
@@ -512,7 +505,13 @@ def predict(tensor):
 """
 
 
-def record(seconds=1):
+def record_colab(seconds=1):
+
+    from google.colab import output as colab_output
+    from base64 import b64decode
+    from io import BytesIO
+    from pydub import AudioSegment
+
     display(ipd.Javascript(RECORD))
     print(f"Recording started for {seconds} seconds.")
     s = colab_output.eval_js("record(%d)" % (seconds * 1000))
@@ -525,6 +524,32 @@ def record(seconds=1):
     return torchaudio.load(filename)
 
 
+def record_noncolab(seconds=1):
+
+    import sounddevice
+    import scipy.io.wavfile
+
+    sample_rate = 44100
+
+    print(f"Recording started for {seconds} seconds.")
+    myrecording = sounddevice.rec(
+        int(seconds * sample_rate), samplerate=sample_rate, channels=1
+    )
+    sounddevice.wait()
+    print("Recording ended.")
+
+    filename = "_audio.wav"
+    scipy.io.wavfile.write(filename, sample_rate, myrecording)
+    return torchaudio.load(filename)
+
+
+# Detect whether notebook runs in google colab
+if "google.colab" in sys.modules:
+    record = record_colab
+else:
+    record = record_noncolab
+
+
 waveform, sample_rate = record()
 print(f"Predicted: {predict(waveform)}.")
 ipd.Audio(waveform.numpy(), rate=sample_rate)

From 7f098ab7611cae3c21101d74821bc43b29a9b34b Mon Sep 17 00:00:00 2001
From: Vincent Quenneville-Belair <vincentqb@gmail.com>
Date: Mon, 9 Nov 2020 11:56:05 -0500
Subject: [PATCH 4/7] multiline on one line.

---
 ...nd_recognition_with_torchaudio_tutorial.py | 50 +++++++++----------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py
index 1574a55ed76..cdd1450b3e4 100644
--- a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py
+++ b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py
@@ -481,30 +481,6 @@ def predict(tensor):
 #
 
 
-RECORD = """
-const sleep  = time => new Promise(resolve => setTimeout(resolve, time))
-const b2text = blob => new Promise(resolve => {
-  const reader = new FileReader()
-  reader.onloadend = e => resolve(e.srcElement.result)
-  reader.readAsDataURL(blob)
-})
-var record = time => new Promise(async resolve => {
-  stream = await navigator.mediaDevices.getUserMedia({ audio: true })
-  recorder = new MediaRecorder(stream)
-  chunks = []
-  recorder.ondataavailable = e => chunks.push(e.data)
-  recorder.start()
-  await sleep(time)
-  recorder.onstop = async ()=>{
-    blob = new Blob(chunks)
-    text = await b2text(blob)
-    resolve(text)
-  }
-  recorder.stop()
-})
-"""
-
-
 def record_colab(seconds=1):
 
     from google.colab import output as colab_output
@@ -512,8 +488,32 @@ def record_colab(seconds=1):
     from io import BytesIO
     from pydub import AudioSegment
 
-    display(ipd.Javascript(RECORD))
+    RECORD = (
+        b"const sleep  = time => new Promise(resolve => setTimeout(resolve, time))\n"
+        b"const b2text = blob => new Promise(resolve => {\n"
+        b"  const reader = new FileReader()\n"
+        b"  reader.onloadend = e => resolve(e.srcElement.result)\n"
+        b"  reader.readAsDataURL(blob)\n"
+        b"})\n"
+        b"var record = time => new Promise(async resolve => {\n"
+        b"  stream = await navigator.mediaDevices.getUserMedia({ audio: true })\n"
+        b"  recorder = new MediaRecorder(stream)\n"
+        b"  chunks = []\n"
+        b"  recorder.ondataavailable = e => chunks.push(e.data)\n"
+        b"  recorder.start()\n"
+        b"  await sleep(time)\n"
+        b"  recorder.onstop = async ()=>{\n"
+        b"    blob = new Blob(chunks)\n"
+        b"    text = await b2text(blob)\n"
+        b"    resolve(text)\n"
+        b"  }\n"
+        b"  recorder.stop()\n"
+        b"})"
+    )
+    RECORD = RECORD.decode("ascii")
+
     print(f"Recording started for {seconds} seconds.")
+    display(ipd.Javascript(RECORD))
     s = colab_output.eval_js("record(%d)" % (seconds * 1000))
     print("Recording ended.")
     b = b64decode(s.split(",")[1])

From 4f30a8a6a21bf3cf844c49b91893103484e2f121 Mon Sep 17 00:00:00 2001
From: Vincent Quenneville-Belair <vincentqb@gmail.com>
Date: Tue, 17 Nov 2020 16:07:39 -0500
Subject: [PATCH 5/7] remove noncolab support.

---
 ...nd_recognition_with_torchaudio_tutorial.py | 32 +++----------------
 1 file changed, 4 insertions(+), 28 deletions(-)

diff --git a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py
index cdd1450b3e4..75914c2205a 100644
--- a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py
+++ b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py
@@ -481,7 +481,7 @@ def predict(tensor):
 #
 
 
-def record_colab(seconds=1):
+def record(seconds=1):
 
     from google.colab import output as colab_output
     from base64 import b64decode
@@ -524,35 +524,11 @@ def record_colab(seconds=1):
     return torchaudio.load(filename)
 
 
-def record_noncolab(seconds=1):
-
-    import sounddevice
-    import scipy.io.wavfile
-
-    sample_rate = 44100
-
-    print(f"Recording started for {seconds} seconds.")
-    myrecording = sounddevice.rec(
-        int(seconds * sample_rate), samplerate=sample_rate, channels=1
-    )
-    sounddevice.wait()
-    print("Recording ended.")
-
-    filename = "_audio.wav"
-    scipy.io.wavfile.write(filename, sample_rate, myrecording)
-    return torchaudio.load(filename)
-
-
 # Detect whether notebook runs in google colab
 if "google.colab" in sys.modules:
-    record = record_colab
-else:
-    record = record_noncolab
-
-
-waveform, sample_rate = record()
-print(f"Predicted: {predict(waveform)}.")
-ipd.Audio(waveform.numpy(), rate=sample_rate)
+    waveform, sample_rate = record()
+    print(f"Predicted: {predict(waveform)}.")
+    ipd.Audio(waveform.numpy(), rate=sample_rate)
 
 
 ######################################################################

From f5b9169f9643ecc1c58540fd52396db6204bf65e Mon Sep 17 00:00:00 2001
From: Vincent Quenneville-Belair <vincentqb@gmail.com>
Date: Wed, 18 Nov 2020 15:19:48 -0500
Subject: [PATCH 6/7] text tqdm.

---
 .../speech_command_recognition_with_torchaudio_tutorial.py     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py
index 75914c2205a..0627949aecf 100644
--- a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py
+++ b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py
@@ -33,7 +33,8 @@
 
 import matplotlib.pyplot as plt
 import IPython.display as ipd
-from tqdm.notebook import tqdm
+
+from tqdm import tqdm
 
 
 ######################################################################

From 0ed65270644b4ccf6061eac1fdbb4964c304acb8 Mon Sep 17 00:00:00 2001
From: Vincent Quenneville-Belair <vincentqb@gmail.com>
Date: Thu, 19 Nov 2020 15:19:12 -0500
Subject: [PATCH 7/7] Revert "remove noncolab support."

This reverts commit 4f30a8a6a21bf3cf844c49b91893103484e2f121.
---
 ...nd_recognition_with_torchaudio_tutorial.py | 32 ++++++++++++++++---
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py
index 0627949aecf..506ba06f571 100644
--- a/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py
+++ b/intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py
@@ -482,7 +482,7 @@ def predict(tensor):
 #
 
 
-def record(seconds=1):
+def record_colab(seconds=1):
 
     from google.colab import output as colab_output
     from base64 import b64decode
@@ -525,11 +525,35 @@ def record(seconds=1):
     return torchaudio.load(filename)
 
 
+def record_noncolab(seconds=1):
+
+    import sounddevice
+    import scipy.io.wavfile
+
+    sample_rate = 44100
+
+    print(f"Recording started for {seconds} seconds.")
+    myrecording = sounddevice.rec(
+        int(seconds * sample_rate), samplerate=sample_rate, channels=1
+    )
+    sounddevice.wait()
+    print("Recording ended.")
+
+    filename = "_audio.wav"
+    scipy.io.wavfile.write(filename, sample_rate, myrecording)
+    return torchaudio.load(filename)
+
+
 # Detect whether notebook runs in google colab
 if "google.colab" in sys.modules:
-    waveform, sample_rate = record()
-    print(f"Predicted: {predict(waveform)}.")
-    ipd.Audio(waveform.numpy(), rate=sample_rate)
+    record = record_colab
+else:
+    record = record_noncolab
+
+
+waveform, sample_rate = record()
+print(f"Predicted: {predict(waveform)}.")
+ipd.Audio(waveform.numpy(), rate=sample_rate)
 
 
 ######################################################################