Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions examples/common/network.c
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,10 @@ int network_init(const char *ssid, const char *password, network_connect_cb cb)
NULL,
&instance_got_ip));
wifi_config.sta.threshold.authmode = WIFI_AUTH_WPA2_PSK;
if (load_from_nvs()) {
wifi_config.sta.sae_pwe_h2e = WPA3_SAE_PWE_BOTH;
/*if (load_from_nvs()) {
ESP_LOGI(TAG, "Force to use wifi config from nvs");
} else {
} else*/ {
if (ssid) {
memcpy(wifi_config.sta.ssid, ssid, strlen(ssid) + 1);
}
Expand Down
2 changes: 1 addition & 1 deletion examples/voice_agent_lcd/main/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
file(GLOB_RECURSE ALL_SOURCES ./*.c)
file(GLOB_RECURSE ALL_SOURCES ./*.c ./*.cpp)

idf_component_register(SRC_DIRS "." "fonts" "images")

Expand Down
152 changes: 152 additions & 0 deletions examples/voice_agent_lcd/main/audio_render_sink.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
#include "av_render.h"
#include "esp_capture.h"

#include "av_render_default.h"
#include "codec_init.h"
#include "esp_audio_dec_default.h"
#include "esp_audio_enc_default.h"
#include "esp_capture_audio_enc.h"
#include "esp_capture_defaults.h"
#include "esp_capture_path_simple.h"
#include "esp_check.h"
#include "esp_log.h"
#include "media_lib_os.h"

#include "audio_render_sink.h"
#include "fft.h"
#include "media.h"

#include "audio_visualizer.h"

typedef struct {
audio_render_handle_t audio_renderer;
av_render_handle_t av_renderer_handle;
} renderer_system_t;


static audio_render_handle_t real_render = NULL;
static const char *TAG = "au_render_sink";
static renderer_system_t renderer_system;
static av_render_audio_frame_info_t frame_info = {
.sample_rate = 16000,
.channel = 2,
.bits_per_sample = 16,
};

static audio_render_handle_t au_render_sink_init(void *cfg, int cfg_size) {
if (cfg_size != sizeof(i2s_render_cfg_t)) {
return NULL;
}

if (real_render == NULL) {
real_render = av_render_alloc_i2s_render((i2s_render_cfg_t *)cfg);
}

audio_visualizer_init();

return (audio_render_handle_t)real_render;
}

static int au_render_sink_open(audio_render_handle_t render,
av_render_audio_frame_info_t *info) {
int ret = 0;
if (real_render != NULL) {
ret = audio_render_open(real_render, info);
}
return ret;
}

static int au_render_sink_write(audio_render_handle_t render,
av_render_audio_frame_t *audio_data) {
if (real_render) {
//ESP_LOGE(TAG, "Write audio data: pts=%lu, size=%d", audio_data->pts,
// audio_data->size);
audio_visualizer_processing(audio_data->data, audio_data->size);
// Write audio data to the render
audio_render_write(real_render, audio_data);
}
return 0;
}

static int au_render_sink_get_latency(audio_render_handle_t render,
uint32_t *latency) {
return audio_render_get_latency(real_render, latency);
}

static int au_render_sink_get_frame_info(audio_render_handle_t render,
av_render_audio_frame_info_t *info) {
return audio_render_get_frame_info(real_render, info);
}

static int au_render_sink_set_speed(audio_render_handle_t render, float speed) {
return audio_render_set_speed(real_render, speed);
}

static int au_render_sink_close(audio_render_handle_t render) {
int ret = 0;
if (real_render != NULL) {
ret = audio_render_close(real_render);
if (ret != 0) {
ESP_LOGE(TAG, "Failed to close render: %d", ret);
}
real_render = NULL;

audio_visualizer_deinit();
ESP_LOGI(TAG, "Audio render sink closed");
}
return ret;
}

static audio_render_handle_t
av_render_alloc_au_render_sink(i2s_render_cfg_t *i2s_cfg) {
audio_render_cfg_t cfg = {
.ops =
{
.init = au_render_sink_init,
.open = au_render_sink_open,
.write = au_render_sink_write,
.get_latency = au_render_sink_get_latency,
.set_speed = au_render_sink_set_speed,
.get_frame_info = au_render_sink_get_frame_info,
.close = au_render_sink_close,
},
.cfg = i2s_cfg,
.cfg_size = sizeof(i2s_render_cfg_t),
};
return audio_render_alloc_handle(&cfg);
}

int build_player_with_sink_system() {
i2s_render_cfg_t i2s_cfg = {
.play_handle = get_playback_handle(),
};
renderer_system.audio_renderer = av_render_alloc_au_render_sink(&i2s_cfg);
if (renderer_system.audio_renderer == NULL) {
ESP_LOGE(TAG, "Fail to create audio render");
return -1;
}
esp_codec_dev_set_out_vol(i2s_cfg.play_handle, CONFIG_DEFAULT_PLAYBACK_VOL);

av_render_cfg_t render_cfg = {
.audio_render = renderer_system.audio_renderer,
.audio_raw_fifo_size = 8 * 4096,
.audio_render_fifo_size = 100 * 1024,
.allow_drop_data = false,
};

renderer_system.av_renderer_handle = av_render_open(&render_cfg);

if (renderer_system.av_renderer_handle == NULL) {
ESP_LOGE(TAG, "Fail to create player");
return -1;
}
// When support AEC, reference data is from speaker right channel for ES8311
// so must output 2 channel
av_render_set_fixed_frame_info(renderer_system.av_renderer_handle,
&frame_info);
return 0;
}

av_render_handle_t media_get_renderer(void) {
return renderer_system.av_renderer_handle;
}
3 changes: 3 additions & 0 deletions examples/voice_agent_lcd/main/audio_render_sink.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#pragma once

int build_player_with_sink_system();
117 changes: 117 additions & 0 deletions examples/voice_agent_lcd/main/audio_visualizer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
#include "audio_visualizer.h"
#include "esp_log.h"
#include "fft.h"
#include "media_lib_os.h"

#include "freertos/FreeRTOS.h"
#include "freertos/semphr.h"
#include "freertos/task.h"

#include <mutex>
#include <queue>
#include <vector>

static const char *TAG = "audio_visualizer";
static std::queue<std::vector<uint8_t>> audio_data_queue;
static media_lib_thread_handle_t thread;
static fft_processor_t *fft_processor = NULL;
static bool running = false;
static bool fft_processor_initialized = false;
static SemaphoreHandle_t sem;

static void fft_processor_thread(void *arg);

int audio_visualizer_init(void) {
ESP_LOGI(TAG, "Audio visualizer initialized");

if (!fft_processor) {
fft_processor = (fft_processor_t *)malloc(sizeof(fft_processor_t));
memset(fft_processor, 0, sizeof(fft_processor_t));
// Initialize FFT processor
esp_err_t ret = fft_processor_init(fft_processor, 1024, FFT_WINDOW_HANNING);
if (ret != ESP_OK) {
ESP_LOGE(TAG, "Failed to initialize FFT processor: %s",
esp_err_to_name(ret));
}
fft_processor_initialized = ret == ESP_OK;

sem = xSemaphoreCreateBinary();
if (sem == NULL) {
ESP_LOGE(TAG, "Failed to create semaphore");
return -1;
}
running = true;
// Create a thread for processing audio data
media_lib_thread_create_from_scheduler(&thread, "fft_render",
fft_processor_thread, NULL);
}
return 0;
}

static void fft_processor_thread(void *arg) {
while (running) {
xSemaphoreTake(sem, pdMS_TO_TICKS(1000));
while (!audio_data_queue.empty()) {
auto audio_data = audio_data_queue.front();
audio_data_queue.pop();
if (fft_processor_initialized) {
// Apply FFT processing
fft_result_t *fft_result = fft_processor_process(
fft_processor, (const int16_t *)audio_data.data());
if (fft_result) {
// Process FFT result if needed
// For example, you can log or analyze the magnitudes
ESP_LOGE(TAG, "FFT result length: %d", fft_result->length);

fft_compute_bands_result_t *bands =
fft_result_compute_bands(fft_result, 0, 8000, 5, 16000);

if (bands) {
// Process frequency bands if needed
ESP_LOGI(TAG, "FFT bands length: %d", bands->count);
for (int i = 0; i < bands->count; i++) {
ESP_LOGE(TAG, "Band %d: magnitude=%.2f, frequency=%.2f", i,
bands->magnitudes[i], bands->frequencies[i]);
}
// Free the bands result after processing
fft_compute_bands_result_free(bands);
}

fft_result_free(fft_result);
} else {
ESP_LOGE(TAG, "FFT processing failed");
}
}
}
}
}

int audio_visualizer_processing(uint8_t *audio_data, uint32_t data_size) {

audio_data_queue.push(
std::vector<uint8_t>(audio_data, audio_data + data_size));

if (sem) {
xSemaphoreGive(sem);
} else {
ESP_LOGE(TAG, "Semaphore not initialized");
}

return 0;
}

int audio_visualizer_deinit(void) {
if (fft_processor) {
fft_processor_deinit(fft_processor);
fft_processor_initialized = false;
ESP_LOGI(TAG, "FFT processor deinitialized");
}
// close the thread
if (thread) {
running = false;
xSemaphoreGive(sem);
media_lib_thread_destroy(thread);
thread = NULL;
}
return 0;
}
17 changes: 17 additions & 0 deletions examples/voice_agent_lcd/main/audio_visualizer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#pragma once

#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

int audio_visualizer_init(void);

int audio_visualizer_processing(uint8_t *audio_data, uint32_t data_size);

int audio_visualizer_deinit(void);

#ifdef __cplusplus
}
#endif
Loading
Loading