Skip to content

[I2S][SR] Add new I2S library and Sound Recognition support #8714

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Oct 18, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ set(LIBRARY_SRCS
libraries/BluetoothSerial/src/BTScanResultsSet.cpp
libraries/DNSServer/src/DNSServer.cpp
libraries/EEPROM/src/EEPROM.cpp
libraries/ESP_I2S/src/ESP_I2S.cpp
libraries/ESP_SR/src/ESP_SR.cpp
libraries/ESP_SR/src/esp32-hal-sr.c
libraries/ESPmDNS/src/ESPmDNS.cpp
libraries/Ethernet/src/ETH.cpp
libraries/FFat/src/FFat.cpp
Expand All @@ -91,7 +94,6 @@ set(LIBRARY_SRCS
libraries/HTTPUpdate/src/HTTPUpdate.cpp
libraries/LittleFS/src/LittleFS.cpp
libraries/Insights/src/Insights.cpp
libraries/I2S/src/I2S.cpp
libraries/NetBIOS/src/NetBIOS.cpp
libraries/Preferences/src/Preferences.cpp
libraries/RainMaker/src/RMaker.cpp
Expand Down
21 changes: 21 additions & 0 deletions libraries/ESP_I2S/keywords.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#######################################
# Syntax Coloring Map For ESP_I2S
#######################################

#######################################
# Datatypes (KEYWORD1)
#######################################

ESP_I2S KEYWORD1

#######################################
# Methods and Functions (KEYWORD2)
#######################################

onEvent KEYWORD2

#######################################
# Constants (LITERAL1)
#######################################

SR_EVENT_WAKEWORD LITERAL1
9 changes: 9 additions & 0 deletions libraries/ESP_I2S/library.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
name=ESP_I2S
version=1.0.0
author=me-no-dev
maintainer=me-no-dev
sentence=Library for ESP I2S communication
paragraph=Supports ESP32 Arduino platforms.
category=Sound
url=https://github.com/espressif/arduino-esp32/
architectures=esp32
771 changes: 771 additions & 0 deletions libraries/ESP_I2S/src/ESP_I2S.cpp

Large diffs are not rendered by default.

104 changes: 104 additions & 0 deletions libraries/ESP_I2S/src/ESP_I2S.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#pragma once
#include "Arduino.h"
#include "esp_err.h"
#include "driver/i2s_std.h"
#include "driver/i2s_tdm.h"
#include "driver/i2s_pdm.h"
#include "soc/soc_caps.h"

typedef esp_err_t (*i2s_channel_read_fn)(i2s_chan_handle_t handle, char * tmp_buf, void *dest, size_t size, size_t *bytes_read, uint32_t timeout_ms);

typedef enum {
I2S_MODE_STD,
I2S_MODE_TDM,
I2S_MODE_PDM_TX,
I2S_MODE_PDM_RX
} i2s_mode_t;

typedef enum {
I2S_RX_TRANSFORM_NONE,
I2S_RX_TRANSFORM_32_TO_16,
I2S_RX_TRANSFORM_16_STEREO_TO_MONO,
I2S_RX_TRANSFORM_MAX
} i2s_rx_transform_t;

class I2SClass: public Stream {
public:
I2SClass();
~I2SClass();

//STD + TDM mode
void setPins(int8_t bclk, int8_t ws, int8_t dout, int8_t din=-1, int8_t mclk=-1);
void setInverted(bool bclk, bool ws, bool mclk=false);

//PDM TX + PDM RX mode
void setPinsPdmTx(int8_t clk, int8_t dout0, int8_t dout1);
void setPinsPdmRx(int8_t clk, int8_t din0, int8_t din1, int8_t din2, int8_t din3);
void setInvertedPdm(bool clk);

bool begin(i2s_mode_t mode, uint32_t rate, i2s_data_bit_width_t bits_cfg, i2s_slot_mode_t ch);
bool configureTX(uint32_t rate, i2s_data_bit_width_t bits_cfg, i2s_slot_mode_t ch);
bool configureRX(uint32_t rate, i2s_data_bit_width_t bits_cfg, i2s_slot_mode_t ch, i2s_rx_transform_t transform=I2S_RX_TRANSFORM_NONE);
bool end();

size_t readBytes(char *buffer, size_t size);
size_t write(uint8_t *buffer, size_t size);

i2s_chan_handle_t txChan();
uint32_t txSampleRate();
i2s_data_bit_width_t txDataWidth();
i2s_slot_mode_t txSlotMode();

i2s_chan_handle_t rxChan();
uint32_t rxSampleRate();
i2s_data_bit_width_t rxDataWidth();
i2s_slot_mode_t rxSlotMode();

int lastError();

int available();
int peek();
int read();
size_t write(uint8_t d);

// Record short PCM WAV to memory with current RX settings. Returns buffer that must be freed by the user.
uint8_t * recordWAV(size_t rec_seconds, size_t * out_size);
// Play short PCM WAV from memory
void playWAV(uint8_t * data, size_t len);
// Play short MP3 from memory
bool playMP3(uint8_t *src, size_t src_len);


private:
esp_err_t last_error;
i2s_mode_t _mode;

i2s_chan_handle_t tx_chan;
uint32_t tx_sample_rate;
i2s_data_bit_width_t tx_data_bit_width;
i2s_slot_mode_t tx_slot_mode;

i2s_channel_read_fn rx_fn;
i2s_rx_transform_t rx_transform;
char * rx_transform_buf;
size_t rx_transform_buf_len;

i2s_chan_handle_t rx_chan;
uint32_t rx_sample_rate;
i2s_data_bit_width_t rx_data_bit_width;
i2s_slot_mode_t rx_slot_mode;

//STD and TDM mode
int8_t _mclk, _bclk, _ws, _dout, _din;
bool _mclk_inv, _bclk_inv, _ws_inv;

//PDM mode
int8_t _rx_clk, _rx_din0, _rx_din1, _rx_din2, _rx_din3; //TODO: soc_caps.h 1/4
bool _rx_clk_inv;
int8_t _tx_clk, _tx_dout0, _tx_dout1;
bool _tx_clk_inv;

bool allocTranformRX(size_t buf_len);
bool transformRX(i2s_rx_transform_t transform);
static bool i2sDetachBus(void * bus_pointer);
};
91 changes: 91 additions & 0 deletions libraries/ESP_I2S/src/wav_header.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#pragma once
#include <stdint.h>

/**
* @brief Header structure for WAV file with only one data chunk
*
* @note See this for reference: http://soundfile.sapp.org/doc/WaveFormat/
*
* @note Assignment to variables in this struct directly is only possible for little endian architectures
* (including Xtensa & RISC-V)
*/

typedef struct {
char chunk_id[4]; /*!< Contains the letters "RIFF" in ASCII form */
uint32_t chunk_size; /*!< This is the size of the rest of the chunk following this number */
char chunk_format[4]; /*!< Contains the letters "WAVE" */
} __attribute__((packed)) wav_descriptor_chunk_t; /*!< Canonical WAVE format starts with the RIFF header */

typedef struct {
char subchunk_id[4]; /*!< Contains the letters "fmt " */
uint32_t subchunk_size; /*!< PCM = 16, This is the size of the rest of the Subchunk which follows this number */
uint16_t audio_format; /*!< PCM = 1, values other than 1 indicate some form of compression */
uint16_t num_of_channels; /*!< Mono = 1, Stereo = 2, etc. */
uint32_t sample_rate; /*!< 8000, 44100, etc. */
uint32_t byte_rate; /*!< ==SampleRate * NumChannels * BitsPerSample s/ 8 */
uint16_t block_align; /*!< ==NumChannels * BitsPerSample / 8 */
uint16_t bits_per_sample; /*!< 8 bits = 8, 16 bits = 16, etc. */
} __attribute__((packed)) pcm_wav_fmt_chunk_t; /*!< The "fmt " subchunk describes the sound data's format */

typedef struct {
char subchunk_id[4]; /*!< Contains the letters "fmt " */
uint32_t subchunk_size; /*!< ALAW/MULAW = 18, This is the size of the rest of the Subchunk which follows this number */
uint16_t audio_format; /*!< ALAW = 6, MULAW = 7, values other than 1 indicate some form of compression */
uint16_t num_of_channels; /*!< ALAW/MULAW = 1, Mono = 1, Stereo = 2, etc. */
uint32_t sample_rate; /*!< ALAW/MULAW = 8000, 8000, 44100, etc. */
uint32_t byte_rate; /*!< ALAW/MULAW = 8000, ==SampleRate * NumChannels * BitsPerSample s/ 8 */
uint16_t block_align; /*!< ALAW/MULAW = 1, ==NumChannels * BitsPerSample / 8 */
uint16_t bits_per_sample; /*!< ALAW/MULAW = 8, 8 bits = 8, 16 bits = 16, etc. */
uint16_t ext_size; /*!< ALAW/MULAW = 0, Size of the extension (0 or 22) */
} __attribute__((packed)) non_pcm_wav_fmt_chunk_t; /*!< The "fmt " subchunk describes the sound data's format */

typedef struct {
char subchunk_id[4]; /*!< Contains the letters "data" */
uint32_t subchunk_size; /*!< ==NumSamples * NumChannels * BitsPerSample / 8 */
} __attribute__((packed)) wav_data_chunk_t; /*!< The "data" subchunk contains the size of the data and the actual sound */

typedef struct {
wav_descriptor_chunk_t descriptor_chunk; /*!< Canonical WAVE format starts with the RIFF header */
pcm_wav_fmt_chunk_t fmt_chunk; /*!< The "fmt " subchunk describes the sound data's format */
wav_data_chunk_t data_chunk; /*!< The "data" subchunk contains the size of the data and the actual sound */
} __attribute__((packed)) pcm_wav_header_t;

typedef struct {
wav_descriptor_chunk_t descriptor_chunk; /*!< Canonical WAVE format starts with the RIFF header */
non_pcm_wav_fmt_chunk_t fmt_chunk; /*!< The "fmt " subchunk describes the sound data's format */
wav_data_chunk_t data_chunk; /*!< The "data" subchunk contains the size of the data and the actual sound */
} __attribute__((packed)) non_pcm_wav_header_t;

#define WAVE_FORMAT_PCM 1 // PCM
#define WAVE_FORMAT_IEEE_FLOAT 3 // IEEE float
#define WAVE_FORMAT_ALAW 6 // 8-bit ITU-T G.711 A-law
#define WAVE_FORMAT_MULAW 7 // 8-bit ITU-T G.711 µ-law

#define PCM_WAV_HEADER_SIZE 44
#define NON_PCM_WAV_HEADER_SIZE 46

/**
* @brief Default header for PCM format WAV files
*
*/
#define PCM_WAV_HEADER_DEFAULT(wav_sample_size, wav_sample_bits, wav_sample_rate, wav_channel_num) { \
.descriptor_chunk = { \
.chunk_id = {'R', 'I', 'F', 'F'}, \
.chunk_size = (wav_sample_size) + sizeof(pcm_wav_header_t) - 8, \
.chunk_format = {'W', 'A', 'V', 'E'} \
}, \
.fmt_chunk = { \
.subchunk_id = {'f', 'm', 't', ' '}, \
.subchunk_size = 16, /* 16 for PCM */ \
.audio_format = WAVE_FORMAT_PCM, /* 1 for PCM */ \
.num_of_channels = (uint16_t)(wav_channel_num), \
.sample_rate = (wav_sample_rate), \
.byte_rate = (wav_sample_bits) * (wav_sample_rate) * (wav_channel_num) / 8, \
.block_align = (uint16_t)((wav_sample_bits) * (wav_channel_num) / 8), \
.bits_per_sample = (uint16_t)(wav_sample_bits)\
}, \
.data_chunk = { \
.subchunk_id = {'d', 'a', 't', 'a'}, \
.subchunk_size = (wav_sample_size) \
} \
}
92 changes: 92 additions & 0 deletions libraries/ESP_SR/examples/Basic/Basic.ino
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@

#include "ESP_I2S.h"
#include "ESP_SR.h"

#define I2S_PIN_BCK 17
#define I2S_PIN_WS 47
#define I2S_PIN_DIN 16

#define LIGHT_PIN 40
#define FAN_PIN 41

I2SClass i2s;

// Generated using the following command:
// python3 tools/gen_sr_commands.py "Turn on the light,Switch on the light;Turn off the light,Switch off the light,Go dark;Start fan;Stop fan"
enum {
SR_CMD_TURN_ON_THE_LIGHT,
SR_CMD_TURN_OFF_THE_LIGHT,
SR_CMD_START_FAN,
SR_CMD_STOP_FAN,
};
static const sr_cmd_t sr_commands[] = {
{ 0, "Turn on the light", "TkN nN jc LiT"},
{ 0, "Switch on the light", "SWgp nN jc LiT"},
{ 1, "Turn off the light", "TkN eF jc LiT"},
{ 1, "Switch off the light", "SWgp eF jc LiT"},
{ 1, "Go dark", "Gb DnRK"},
{ 2, "Start fan", "STnRT FaN"},
{ 3, "Stop fan", "STnP FaN"},
};

void onSrEvent(sr_event_t event, int command_id, int phrase_id){
switch(event){
case SR_EVENT_WAKEWORD:
Serial.println("WakeWord Detected!");
break;
case SR_EVENT_WAKEWORD_CHANNEL:
Serial.printf("WakeWord Channel %d Verified!\n", command_id);
ESP_SR.setMode(SR_MODE_COMMAND); // Switch to Command detection
break;
case SR_EVENT_TIMEOUT:
Serial.println("Timeout Detected!");
ESP_SR.setMode(SR_MODE_WAKEWORD); // Switch back to WakeWord detection
break;
case SR_EVENT_COMMAND:
Serial.printf("Command %d Detected! %s\n", command_id, sr_commands[phrase_id].str);
switch(command_id){
case SR_CMD_TURN_ON_THE_LIGHT:
digitalWrite(LIGHT_PIN, HIGH);
break;
case SR_CMD_TURN_OFF_THE_LIGHT:
digitalWrite(LIGHT_PIN, LOW);
break;
case SR_CMD_START_FAN:
digitalWrite(FAN_PIN, HIGH);
break;
case SR_CMD_STOP_FAN:
digitalWrite(FAN_PIN, LOW);
break;
default:
Serial.println("Unknown Command!");
break;
}
ESP_SR.setMode(SR_MODE_COMMAND); // Allow for more commands to be given, before timeout
// ESP_SR.setMode(SR_MODE_WAKEWORD); // Switch back to WakeWord detection
break;
default:
Serial.println("Unknown Event!");
break;
}
}

void setup(){
Serial.begin(115200);

pinMode(LIGHT_PIN, OUTPUT);
digitalWrite(LIGHT_PIN, LOW);
pinMode(FAN_PIN, OUTPUT);
digitalWrite(FAN_PIN, LOW);

i2s.setPins(I2S_PIN_BCK, I2S_PIN_WS, -1, I2S_PIN_DIN);
i2s.setTimeout(1000);
i2s.begin(16000, I2S_DATA_BIT_WIDTH_16BIT, I2S_SLOT_MODE_STEREO);


ESP_SR.onEvent(onSrEvent);
ESP_SR.begin(i2s, sr_commands, sizeof(sr_commands) / sizeof(sr_cmd_t), SR_CHANNELS_STEREO, SR_MODE_WAKEWORD);
}

void loop(){

}
40 changes: 40 additions & 0 deletions libraries/ESP_SR/keywords.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#######################################
# Syntax Coloring Map For ESP_SR
#######################################

#######################################
# Datatypes (KEYWORD1)
#######################################

ESP_SR KEYWORD1
ESP_SR_Class KEYWORD1
sr_cmd_t KEYWORD1
sr_event_t KEYWORD1
sr_mode_t KEYWORD1
sr_channels_t KEYWORD1
sr_cb KEYWORD1

#######################################
# Methods and Functions (KEYWORD2)
#######################################

onEvent KEYWORD2
setMode KEYWORD2
pause KEYWORD2
resume KEYWORD2

#######################################
# Constants (LITERAL1)
#######################################

SR_EVENT_WAKEWORD LITERAL1
SR_EVENT_WAKEWORD_CHANNEL LITERAL1
SR_EVENT_COMMAND LITERAL1
SR_EVENT_TIMEOUT LITERAL1
SR_MODE_OFF LITERAL1
SR_MODE_WAKEWORD LITERAL1
SR_MODE_COMMAND LITERAL1
SR_MODE_MAX LITERAL1
SR_CHANNELS_MONO LITERAL1
SR_CHANNELS_STEREO LITERAL1
SR_CHANNELS_MAX LITERAL1
9 changes: 9 additions & 0 deletions libraries/ESP_SR/library.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
name=ESP_SR
version=1.0.0
author=me-no-dev
maintainer=me-no-dev
sentence=Library for ESP Sound Recognition
paragraph=Supports ESP32 Arduino platforms.
category=Sound
url=https://github.com/espressif/arduino-esp32/
architectures=esp32
Loading