Skip to content

Commit c14874f

Browse files
committed
record in colab, and also outside. move pydub to first install command.
1 parent 52f51f7 commit c14874f

File tree

1 file changed

+36
-11
lines changed

1 file changed

+36
-11
lines changed

intermediate_source/speech_command_recognition_with_torchaudio_tutorial.py

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,22 +16,20 @@
1616
1717
"""
1818

19-
# Uncomment the following line to run in Google Colab
19+
# Uncomment the line corresponding to your "runtime type" to run in Google Colab
2020

2121
# CPU:
22-
# !pip install torch==1.7.0+cpu torchvision==0.8.1+cpu torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
22+
# !pip install pydub torch==1.7.0+cpu torchvision==0.8.1+cpu torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
2323

2424
# GPU:
25-
# !pip install torch==1.7.0+cu101 torchvision==0.8.1+cu101 torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
26-
27-
# For interactive demo at the end:
28-
# !pip install pydub
25+
# !pip install pydub torch==1.7.0+cu101 torchvision==0.8.1+cu101 torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
2926

3027
import torch
3128
import torch.nn as nn
3229
import torch.nn.functional as F
3330
import torch.optim as optim
3431
import torchaudio
32+
import sys
3533

3634
import matplotlib.pyplot as plt
3735
import IPython.display as ipd
@@ -482,11 +480,6 @@ def predict(tensor):
482480
# will record one second of audio and try to classify it.
483481
#
484482

485-
from google.colab import output as colab_output
486-
from base64 import b64decode
487-
from io import BytesIO
488-
from pydub import AudioSegment
489-
490483

491484
RECORD = """
492485
const sleep = time => new Promise(resolve => setTimeout(resolve, time))
@@ -513,6 +506,12 @@ def predict(tensor):
513506

514507

515508
def record(seconds=1):
509+
510+
from google.colab import output as colab_output
511+
from base64 import b64decode
512+
from io import BytesIO
513+
from pydub import AudioSegment
514+
516515
display(ipd.Javascript(RECORD))
517516
print(f"Recording started for {seconds} seconds.")
518517
s = colab_output.eval_js("record(%d)" % (seconds * 1000))
@@ -525,6 +524,32 @@ def record(seconds=1):
525524
return torchaudio.load(filename)
526525

527526

527+
def record_noncolab(seconds=1):
528+
529+
import sounddevice
530+
import scipy.io.wavfile
531+
532+
sample_rate = 44100
533+
534+
print(f"Recording started for {seconds} seconds.")
535+
myrecording = sounddevice.rec(
536+
int(seconds * sample_rate), samplerate=sample_rate, channels=1
537+
)
538+
sounddevice.wait()
539+
print("Recording ended.")
540+
541+
filename = "_audio.wav"
542+
scipy.io.wavfile.write(filename, sample_rate, myrecording)
543+
return torchaudio.load(filename)
544+
545+
546+
# Detect whether notebook runs in google colab
547+
if "google.colab" in sys.modules:
548+
record = record_colab
549+
else:
550+
record = record_noncolab
551+
552+
528553
waveform, sample_rate = record()
529554
print(f"Predicted: {predict(waveform)}.")
530555
ipd.Audio(waveform.numpy(), rate=sample_rate)

0 commit comments

Comments
 (0)