livekit · cloudwebrtc · Jul 29, 2025 · Jul 29, 2025 · Jul 29, 2025 · Jul 30, 2025
diff --git a/examples/common/network.c b/examples/common/network.c
@@ -145,9 +145,10 @@ int network_init(const char *ssid, const char *password, network_connect_cb cb)
                                                         NULL,
                                                         &instance_got_ip));
     wifi_config.sta.threshold.authmode = WIFI_AUTH_WPA2_PSK;
-    if (load_from_nvs()) {
+    wifi_config.sta.sae_pwe_h2e = WPA3_SAE_PWE_BOTH;
+    /*if (load_from_nvs()) {
         ESP_LOGI(TAG, "Force to use wifi config from nvs");
-    } else {
+    } else*/ {
         if (ssid) {
             memcpy(wifi_config.sta.ssid, ssid, strlen(ssid) + 1);
         }

diff --git a/examples/voice_agent_lcd/main/CMakeLists.txt b/examples/voice_agent_lcd/main/CMakeLists.txt
@@ -1,4 +1,4 @@
-file(GLOB_RECURSE ALL_SOURCES ./*.c)
+file(GLOB_RECURSE ALL_SOURCES ./*.c ./*.cpp)
 
 idf_component_register(SRC_DIRS "." "fonts" "images")
 

diff --git a/examples/voice_agent_lcd/main/audio_render_sink.c b/examples/voice_agent_lcd/main/audio_render_sink.c
@@ -0,0 +1,152 @@
+#include "av_render.h"
+#include "esp_capture.h"
+
+#include "av_render_default.h"
+#include "codec_init.h"
+#include "esp_audio_dec_default.h"
+#include "esp_audio_enc_default.h"
+#include "esp_capture_audio_enc.h"
+#include "esp_capture_defaults.h"
+#include "esp_capture_path_simple.h"
+#include "esp_check.h"
+#include "esp_log.h"
+#include "media_lib_os.h"
+
+#include "audio_render_sink.h"
+#include "fft.h"
+#include "media.h"
+
+#include "audio_visualizer.h"
+
+typedef struct {
+  audio_render_handle_t audio_renderer;
+  av_render_handle_t av_renderer_handle;
+} renderer_system_t;
+
+
+static audio_render_handle_t real_render = NULL;
+static const char *TAG = "au_render_sink";
+static renderer_system_t renderer_system;
+static av_render_audio_frame_info_t frame_info = {
+    .sample_rate = 16000,
+    .channel = 2,
+    .bits_per_sample = 16,
+};
+
+static audio_render_handle_t au_render_sink_init(void *cfg, int cfg_size) {
+  if (cfg_size != sizeof(i2s_render_cfg_t)) {
+    return NULL;
+  }
+
+  if (real_render == NULL) {
+    real_render = av_render_alloc_i2s_render((i2s_render_cfg_t *)cfg);
+  }
+
+  audio_visualizer_init();
+
+  return (audio_render_handle_t)real_render;
+}
+
+static int au_render_sink_open(audio_render_handle_t render,
+                               av_render_audio_frame_info_t *info) {
+  int ret = 0;
+  if (real_render != NULL) {
+    ret = audio_render_open(real_render, info);
+  }
+  return ret;
+}
+
+static int au_render_sink_write(audio_render_handle_t render,
+                                av_render_audio_frame_t *audio_data) {
+  if (real_render) {
+    //ESP_LOGE(TAG, "Write audio data: pts=%lu, size=%d", audio_data->pts,
+    //         audio_data->size);
+    audio_visualizer_processing(audio_data->data, audio_data->size);
+    // Write audio data to the render
+    audio_render_write(real_render, audio_data);
+  }
+  return 0;
+}
+
+static int au_render_sink_get_latency(audio_render_handle_t render,
+                                      uint32_t *latency) {
+  return audio_render_get_latency(real_render, latency);
+}
+
+static int au_render_sink_get_frame_info(audio_render_handle_t render,
+                                         av_render_audio_frame_info_t *info) {
+  return audio_render_get_frame_info(real_render, info);
+}
+
+static int au_render_sink_set_speed(audio_render_handle_t render, float speed) {
+  return audio_render_set_speed(real_render, speed);
+}
+
+static int au_render_sink_close(audio_render_handle_t render) {
+  int ret = 0;
+  if (real_render != NULL) {
+    ret = audio_render_close(real_render);
+    if (ret != 0) {
+      ESP_LOGE(TAG, "Failed to close render: %d", ret);
+    }
+    real_render = NULL;
+
+    audio_visualizer_deinit();
+    ESP_LOGI(TAG, "Audio render sink closed");
+  }
+  return ret;
+}
+
+static audio_render_handle_t
+av_render_alloc_au_render_sink(i2s_render_cfg_t *i2s_cfg) {
+  audio_render_cfg_t cfg = {
+      .ops =
+          {
+              .init = au_render_sink_init,
+              .open = au_render_sink_open,
+              .write = au_render_sink_write,
+              .get_latency = au_render_sink_get_latency,
+              .set_speed = au_render_sink_set_speed,
+              .get_frame_info = au_render_sink_get_frame_info,
+              .close = au_render_sink_close,
+          },
+      .cfg = i2s_cfg,
+      .cfg_size = sizeof(i2s_render_cfg_t),
+  };
+  return audio_render_alloc_handle(&cfg);
+}
+
+int build_player_with_sink_system() {
+  i2s_render_cfg_t i2s_cfg = {
+      .play_handle = get_playback_handle(),
+  };
+  renderer_system.audio_renderer = av_render_alloc_au_render_sink(&i2s_cfg);
+  if (renderer_system.audio_renderer == NULL) {
+    ESP_LOGE(TAG, "Fail to create audio render");
+    return -1;
+  }
+  esp_codec_dev_set_out_vol(i2s_cfg.play_handle, CONFIG_DEFAULT_PLAYBACK_VOL);
+
+  av_render_cfg_t render_cfg = {
+      .audio_render = renderer_system.audio_renderer,
+      .audio_raw_fifo_size = 8 * 4096,
+      .audio_render_fifo_size = 100 * 1024,
+      .allow_drop_data = false,
+  };
+
+  renderer_system.av_renderer_handle = av_render_open(&render_cfg);
+
+  if (renderer_system.av_renderer_handle == NULL) {
+    ESP_LOGE(TAG, "Fail to create player");
+    return -1;
+  }
+  // When support AEC, reference data is from speaker right channel for ES8311
+  // so must output 2 channel
+  av_render_set_fixed_frame_info(renderer_system.av_renderer_handle,
+                                 &frame_info);
+  return 0;
+}
+
+av_render_handle_t media_get_renderer(void) {
+  return renderer_system.av_renderer_handle;
+}
diff --git a/examples/voice_agent_lcd/main/audio_render_sink.h b/examples/voice_agent_lcd/main/audio_render_sink.h
@@ -0,0 +1,3 @@
+#pragma once
+
+int build_player_with_sink_system();
diff --git a/examples/voice_agent_lcd/main/audio_visualizer.cpp b/examples/voice_agent_lcd/main/audio_visualizer.cpp
@@ -0,0 +1,117 @@
+#include "audio_visualizer.h"
+#include "esp_log.h"
+#include "fft.h"
+#include "media_lib_os.h"
+
+#include "freertos/FreeRTOS.h"
+#include "freertos/semphr.h"
+#include "freertos/task.h"
+
+#include <mutex>
+#include <queue>
+#include <vector>
+
+static const char *TAG = "audio_visualizer";
+static std::queue<std::vector<uint8_t>> audio_data_queue;
+static media_lib_thread_handle_t thread;
+static fft_processor_t *fft_processor = NULL;
+static bool running = false;
+static bool fft_processor_initialized = false;
+static SemaphoreHandle_t sem;
+
+static void fft_processor_thread(void *arg);
+
+int audio_visualizer_init(void) {
+  ESP_LOGI(TAG, "Audio visualizer initialized");
+
+  if (!fft_processor) {
+    fft_processor = (fft_processor_t *)malloc(sizeof(fft_processor_t));
+    memset(fft_processor, 0, sizeof(fft_processor_t));
+    // Initialize FFT processor
+    esp_err_t ret = fft_processor_init(fft_processor, 1024, FFT_WINDOW_HANNING);
+    if (ret != ESP_OK) {
+      ESP_LOGE(TAG, "Failed to initialize FFT processor: %s",
+               esp_err_to_name(ret));
+    }
+    fft_processor_initialized = ret == ESP_OK;
+
+    sem = xSemaphoreCreateBinary();
+    if (sem == NULL) {
+      ESP_LOGE(TAG, "Failed to create semaphore");
+      return -1;
+    }
+    running = true;
+    // Create a thread for processing audio data
+    media_lib_thread_create_from_scheduler(&thread, "fft_render",
+                                           fft_processor_thread, NULL);
+  }
+  return 0;
+}
+
+static void fft_processor_thread(void *arg) {
+  while (running) {
+    xSemaphoreTake(sem, pdMS_TO_TICKS(1000));
+    while (!audio_data_queue.empty()) {
+      auto audio_data = audio_data_queue.front();
+      audio_data_queue.pop();
+      if (fft_processor_initialized) {
+        // Apply FFT processing
+        fft_result_t *fft_result = fft_processor_process(
+            fft_processor, (const int16_t *)audio_data.data());
+        if (fft_result) {
+          // Process FFT result if needed
+          // For example, you can log or analyze the magnitudes
+          ESP_LOGE(TAG, "FFT result length: %d", fft_result->length);
+
+          fft_compute_bands_result_t *bands =
+              fft_result_compute_bands(fft_result, 0, 8000, 5, 16000);
+
+          if (bands) {
+            // Process frequency bands if needed
+            ESP_LOGI(TAG, "FFT bands length: %d", bands->count);
+            for (int i = 0; i < bands->count; i++) {
+              ESP_LOGE(TAG, "Band %d: magnitude=%.2f, frequency=%.2f", i,
+                       bands->magnitudes[i], bands->frequencies[i]);
+            }
+            // Free the bands result after processing
+            fft_compute_bands_result_free(bands);
+          }
+
+          fft_result_free(fft_result);
+        } else {
+          ESP_LOGE(TAG, "FFT processing failed");
+        }
+      }
+    }
+  }
+}
+
+int audio_visualizer_processing(uint8_t *audio_data, uint32_t data_size) {
+
+  audio_data_queue.push(
+      std::vector<uint8_t>(audio_data, audio_data + data_size));
+
+  if (sem) {
+    xSemaphoreGive(sem);
+  } else {
+    ESP_LOGE(TAG, "Semaphore not initialized");
+  }
+
+  return 0;
+}
+
+int audio_visualizer_deinit(void) {
+  if (fft_processor) {
+    fft_processor_deinit(fft_processor);
+    fft_processor_initialized = false;
+    ESP_LOGI(TAG, "FFT processor deinitialized");
+  }
+  // close the thread
+  if (thread) {
+    running = false;
+    xSemaphoreGive(sem);
+    media_lib_thread_destroy(thread);
+    thread = NULL;
+  }
+  return 0;
+}
diff --git a/examples/voice_agent_lcd/main/audio_visualizer.h b/examples/voice_agent_lcd/main/audio_visualizer.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int audio_visualizer_init(void);
+
+int audio_visualizer_processing(uint8_t *audio_data, uint32_t data_size);
+
+int audio_visualizer_deinit(void);
+
+#ifdef __cplusplus
+}
+#endif
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		#pragma once

		int build_player_with_sink_system();