Skip to content

Commit 11179f8

Browse files
authored
Video publishing support (#44)
1 parent fc7305a commit 11179f8

File tree

22 files changed

+697
-79
lines changed

22 files changed

+697
-79
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ Use this SDK to add realtime video, audio and data features to your ESP32 projec
2424

2525
- **Supported chips**: ESP32-S3 and ESP32-P4
2626
- **Bidirectional audio**: Opus encoding, acoustic echo cancellation (AEC)
27-
- **Bidirectional video**: *video support coming soon*
27+
- **Video publishing**: H.264 encoding, subscribing coming soon
2828
- **AI Agents**: interact with agents in the cloud built with [LiveKit Agents](https://docs.livekit.io/agents/)
2929
- **Real-time data**: data packets, remote method calls (RPC)
3030

@@ -38,6 +38,7 @@ One of the best ways to get started with LiveKit is by reviewing the [examples](
3838

3939
- [**Voice AI Agent**](./components/livekit/examples/voice_agent/README.md): Conversational AI voice agent that interacts with hardware based on user requests.
4040
- [**Minimal**](./components/livekit/examples/minimal/README.md): Basic example of connecting to a LiveKit room with bidirectional audio.
41+
- [**Minimal Video**](./components/livekit/examples/minimal_video/README.md): Equivalent to the minimal example with video publishing.
4142

4243
Once you have chosen an example to be your starting point, create a fresh project from it locally using the following command:
4344

components/livekit/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@ Use this SDK to add realtime video, audio and data features to your ESP32 projec
1010

1111
- **Supported chips**: ESP32-S3 and ESP32-P4
1212
- **Bidirectional audio**: Opus encoding, acoustic echo cancellation (AEC)
13-
- **Bidirectional video**: *video support coming soon*
13+
- **Video publishing**: H.264 encoding, subscribing coming soon
1414
- **AI Agents**: interact with agents in the cloud built with [LiveKit Agents](https://docs.livekit.io/agents/)
1515
- **Real-time data**: data packets, remote method calls (RPC)

components/livekit/core/engine.c

Lines changed: 62 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -329,9 +329,11 @@ static engine_err_t send_add_video_track(engine_t *eng)
329329
.type = LIVEKIT_PB_TRACK_TYPE_VIDEO,
330330
.source = LIVEKIT_PB_TRACK_SOURCE_CAMERA,
331331
.muted = false,
332+
.width = video_layer.width,
333+
.height = video_layer.height,
332334
.layers_count = 1,
333335
.layers = { video_layer },
334-
.audio_features_count = 0
336+
.backup_codec_policy = LIVEKIT_PB_BACKUP_CODEC_POLICY_REGRESSION
335337
};
336338

337339
if (signal_send_add_track(eng->signal_handle, &req) != SIGNAL_ERR_NONE) {
@@ -341,36 +343,21 @@ static engine_err_t send_add_video_track(engine_t *eng)
341343
return ENGINE_ERR_NONE;
342344
}
343345

344-
/// Begins media streaming and sends add track requests.
345-
static engine_err_t publish_tracks(engine_t *eng)
346+
/// Send add track requests based on the media options.
347+
///
348+
/// Note: SFU expects add track request before publisher peer offer is sent.
349+
///
350+
static engine_err_t send_add_track_requests(engine_t *eng)
346351
{
347-
if (eng->options.media.audio_info.codec == ESP_PEER_AUDIO_CODEC_NONE &&
348-
eng->options.media.video_info.codec == ESP_PEER_VIDEO_CODEC_NONE) {
349-
ESP_LOGI(TAG, "No media tracks to publish");
350-
return ENGINE_ERR_NONE;
352+
if (eng->options.media.audio_info.codec != ESP_PEER_AUDIO_CODEC_NONE &&
353+
send_add_audio_track(eng) != ENGINE_ERR_NONE) {
354+
return ENGINE_ERR_SIGNALING;
351355
}
352-
353-
int ret = ENGINE_ERR_OTHER;
354-
do {
355-
if (media_stream_begin(eng) != ENGINE_ERR_NONE) {
356-
ret = ENGINE_ERR_MEDIA;
357-
break;
358-
}
359-
if (eng->options.media.audio_info.codec != ESP_PEER_AUDIO_CODEC_NONE &&
360-
send_add_audio_track(eng) != ENGINE_ERR_NONE) {
361-
ret = ENGINE_ERR_SIGNALING;
362-
break;
363-
}
364-
if (eng->options.media.video_info.codec != ESP_PEER_VIDEO_CODEC_NONE &&
365-
send_add_video_track(eng) != ENGINE_ERR_NONE) {
366-
ret = ENGINE_ERR_SIGNALING;
367-
break;
368-
}
369-
return ENGINE_ERR_NONE;
370-
} while (0);
371-
372-
media_stream_end(eng);
373-
return ret;
356+
if (eng->options.media.video_info.codec != ESP_PEER_VIDEO_CODEC_NONE &&
357+
send_add_video_track(eng) != ENGINE_ERR_NONE) {
358+
return ENGINE_ERR_SIGNALING;
359+
}
360+
return ENGINE_ERR_NONE;
374361
}
375362

376363
// MARK: - Signal event handlers
@@ -420,6 +407,7 @@ static void on_peer_sdp(const char *sdp, peer_role_t role, void *ctx)
420407
event_enqueue(eng, &ev, false);
421408
}
422409

410+
423411
static bool on_peer_data_packet(livekit_pb_data_packet_t* packet, void *ctx)
424412
{
425413
engine_t *eng = (engine_t *)ctx;
@@ -833,7 +821,9 @@ static bool handle_state_connecting(engine_t *eng, const engine_event_t *ev)
833821
break;
834822
case EV_SIG_STATE:
835823
signal_state_t sig_state = ev->detail.sig_state;
836-
if (sig_state == SIGNAL_STATE_DISCONNECTED) {
824+
if (sig_state == SIGNAL_STATE_CONNECTED) {
825+
send_add_track_requests(eng);
826+
} else if(sig_state == SIGNAL_STATE_DISCONNECTED) {
837827
eng->failure_reason = LIVEKIT_FAILURE_REASON_OTHER;
838828
eng->state = ENGINE_STATE_BACKOFF;
839829
} else if (sig_state & SIGNAL_STATE_FAILED_ANY) {
@@ -886,7 +876,7 @@ static bool handle_state_connected(engine_t *eng, const engine_event_t *ev)
886876
case _EV_STATE_ENTER:
887877
eng->retry_count = 0;
888878
eng->failure_reason = LIVEKIT_FAILURE_REASON_NONE;
889-
publish_tracks(eng);
879+
media_stream_begin(eng);
890880
break;
891881
case EV_CMD_CLOSE:
892882
signal_send_leave(eng->signal_handle);
@@ -1066,6 +1056,45 @@ static void engine_task(void *arg)
10661056
vTaskDelete(NULL);
10671057
}
10681058

1059+
static engine_err_t enable_capture_sink(engine_t *eng)
1060+
{
1061+
esp_capture_sink_cfg_t sink_cfg = {
1062+
.audio_info = {
1063+
.format_id = capture_audio_codec_type(eng->options.media.audio_info.codec),
1064+
.sample_rate = eng->options.media.audio_info.sample_rate,
1065+
.channel = eng->options.media.audio_info.channel,
1066+
.bits_per_sample = 16,
1067+
},
1068+
.video_info = {
1069+
.format_id = ESP_CAPTURE_FMT_ID_H264,
1070+
.width = (uint16_t)eng->options.media.video_info.width,
1071+
.height = (uint16_t)eng->options.media.video_info.height,
1072+
.fps = (uint8_t)eng->options.media.video_info.fps,
1073+
},
1074+
};
1075+
1076+
if (esp_capture_sink_setup(
1077+
eng->options.media.capturer,
1078+
0, // Path index
1079+
&sink_cfg,
1080+
&eng->capturer_path
1081+
) != ESP_CAPTURE_ERR_OK) {
1082+
ESP_LOGE(TAG, "Capture sink setup failed");
1083+
return ENGINE_ERR_MEDIA;
1084+
}
1085+
1086+
// TODO: Add muxer
1087+
1088+
if (esp_capture_sink_enable(
1089+
eng->capturer_path,
1090+
ESP_CAPTURE_RUN_MODE_ALWAYS
1091+
) != ESP_CAPTURE_ERR_OK) {
1092+
ESP_LOGE(TAG, "Capture sink enable failed");
1093+
return ENGINE_ERR_MEDIA;
1094+
}
1095+
return ENGINE_ERR_NONE;
1096+
}
1097+
10691098
// MARK: - Public API
10701099

10711100
engine_handle_t engine_init(const engine_options_t *options)
@@ -1117,38 +1146,9 @@ engine_handle_t engine_init(const engine_options_t *options)
11171146
if (eng->signal_handle == NULL) {
11181147
goto _init_failed;
11191148
}
1149+
eng->renderer_handle = options->media.renderer;
11201150

1121-
esp_capture_sink_cfg_t sink_cfg = {
1122-
.audio_info = {
1123-
.format_id = capture_audio_codec_type(eng->options.media.audio_info.codec),
1124-
.sample_rate = eng->options.media.audio_info.sample_rate,
1125-
.channel = eng->options.media.audio_info.channel,
1126-
.bits_per_sample = 16,
1127-
},
1128-
.video_info = {
1129-
.format_id = capture_video_codec_type(eng->options.media.video_info.codec),
1130-
.width = (uint16_t)eng->options.media.video_info.width,
1131-
.height = (uint16_t)eng->options.media.video_info.height,
1132-
.fps = (uint8_t)eng->options.media.video_info.fps,
1133-
},
1134-
};
1135-
if (options->media.audio_info.codec != ESP_PEER_AUDIO_CODEC_NONE) {
1136-
// TODO: Can we ensure the renderer is valid? If not, return error.
1137-
eng->renderer_handle = options->media.renderer;
1138-
}
1139-
1140-
if (esp_capture_sink_setup(
1141-
eng->options.media.capturer,
1142-
0, // Path index
1143-
&sink_cfg,
1144-
&eng->capturer_path
1145-
) != ESP_CAPTURE_ERR_OK) {
1146-
goto _init_failed;
1147-
}
1148-
if (esp_capture_sink_enable(
1149-
eng->capturer_path,
1150-
ESP_CAPTURE_RUN_MODE_ALWAYS
1151-
) != ESP_CAPTURE_ERR_OK) {
1151+
if (enable_capture_sink(eng) != ENGINE_ERR_NONE) {
11521152
goto _init_failed;
11531153
}
11541154
return eng;
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# IDF
2+
**/sdkconfig
3+
**/sdkconfig.old
4+
**/build
5+
**/managed_components
6+
**/dependencies.lock
7+
**/dist
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# The following lines of boilerplate have to be in your project's CMakeLists
2+
# in this exact order for cmake to work correctly
3+
cmake_minimum_required(VERSION 3.5)
4+
set(COMPONENTS main) # Trim build
5+
include($ENV{IDF_PATH}/tools/cmake/project.cmake)
6+
project(minimal_video)
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Minimal Video
2+
3+
Basic example of connecting to a LiveKit room with bidirectional audio and video publishing.
4+
5+
## Configuration
6+
7+
> [!TIP]
8+
> Options can either be set through *menuconfig* or added to *sdkconfig* as shown below.
9+
10+
### Credentials
11+
12+
**Option A**: Use a LiveKit Sandbox to get up and running quickly. Setup a LiveKit Sandbox from your [Cloud Project](https://cloud.livekit.io/projects/p_/sandbox), and use its ID in your configuration:
13+
14+
```ini
15+
CONFIG_LK_EXAMPLE_USE_SANDBOX=y
16+
CONFIG_LK_EXAMPLE_SANDBOX_ID="my-project-xxxxxx"
17+
```
18+
19+
**Option B**: Specify a server URL and pregenerated token:
20+
21+
```ini
22+
CONFIG_LK_EXAMPLE_USE_PREGENERATED=y
23+
CONFIG_LK_EXAMPLE_TOKEN="your-jwt-token"
24+
CONFIG_LK_EXAMPLE_SERVER_URL="ws://localhost:7880"
25+
```
26+
27+
### Network
28+
29+
Connect using WiFi as follows:
30+
31+
```ini
32+
CONFIG_LK_EXAMPLE_USE_WIFI=y
33+
CONFIG_LK_EXAMPLE_WIFI_SSID="<your SSID>"
34+
CONFIG_LK_EXAMPLE_WIFI_PASSWORD="<your password>"
35+
```
36+
37+
### Development Board
38+
39+
This example uses the Espressif [*codec_board*](https://components.espressif.com/components/tempotian/codec_board/) component to access board-specific peripherals for media capture and rendering. Supported boards are [defined here](https://github.com/espressif/esp-webrtc-solution/blob/65d13427dd83c37264b6cff966d60af0f84f649c/components/codec_board/board_cfg.txt). Locate the name of your board, and set it as follows:
40+
41+
```ini
42+
CONFIG_LK_EXAMPLE_CODEC_BOARD_TYPE="ESP32_P4_DEV_V14"
43+
```
44+
45+
## Build & Flash
46+
47+
Navigate to this directory in your terminal. Run the following command to build your application, flash it to your board, and monitor serial output:
48+
49+
```sh
50+
idf.py flash monitor
51+
```
52+
53+
Once running, the example will establish a network connection, connect to a LiveKit room, and print the following message:
54+
55+
```txt
56+
I (19508) livekit_example: Room state: Connected
57+
```
58+
59+
## Next Steps
60+
61+
With a room connection established, you can connect another client (another ESP32, [LiveKit Meet](https://meet.livekit.io), etc.) or dispatch an [agent](https://docs.livekit.io/agents/) to talk with.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
idf_component_register(SRCS "main.c" "example.c" "board.c" "media.c"
2+
INCLUDE_DIRS ".")
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
menu "LiveKit Example"
2+
3+
config LK_EXAMPLE_CODEC_BOARD_TYPE
4+
string "Codec board type"
5+
default "DUMMY_CODEC_BOARD"
6+
help
7+
The model of dev board you are using. See board_cfg.txt from the codec
8+
board component for a list of supported boards.
9+
10+
choice LK_EXAMPLE_CONNECTION_METHOD
11+
prompt "Choose room connection method"
12+
help
13+
Choose how to connect to the room in the example.
14+
15+
config LK_EXAMPLE_USE_SANDBOX
16+
bool "Sandbox token"
17+
help
18+
Use a sandbox token server for room authentication.
19+
20+
config LK_EXAMPLE_USE_PREGENERATED
21+
bool "Pre-generated token"
22+
help
23+
Use a pre-generated token and server URL for room connection.
24+
endchoice
25+
26+
config LK_EXAMPLE_SERVER_URL
27+
depends on LK_EXAMPLE_USE_PREGENERATED
28+
string "Server URL"
29+
default "ws://localhost:7880"
30+
help
31+
The server URL to use for room connection.
32+
config LK_EXAMPLE_TOKEN
33+
depends on LK_EXAMPLE_USE_PREGENERATED
34+
string "Token"
35+
help
36+
The token to use for room connection.
37+
38+
config LK_EXAMPLE_SANDBOX_ID
39+
depends on LK_EXAMPLE_USE_SANDBOX
40+
string "Sandbox ID"
41+
help
42+
The ID of the sandbox token server to use.
43+
config LK_EXAMPLE_ROOM_NAME
44+
depends on LK_EXAMPLE_USE_SANDBOX
45+
string "Room name (optional)"
46+
help
47+
Specific room name sandbox tokens will be generated with.
48+
config LK_EXAMPLE_PARTICIPANT_NAME
49+
depends on LK_EXAMPLE_USE_SANDBOX
50+
string "Participant name (optional)"
51+
help
52+
Specific participant name sandbox tokens will be generated with.
53+
54+
config LK_EXAMPLE_SPEAKER_VOLUME
55+
int "Default speaker volume (0-100%)"
56+
default 85
57+
range 0 100
58+
help
59+
Default playback volume for speaker output.
60+
endmenu
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#include "esp_log.h"
2+
#include "board.h"
3+
#include "codec_init.h"
4+
#include "codec_board.h"
5+
#include <math.h>
6+
7+
static const char *TAG = "board";
8+
9+
void board_init()
10+
{
11+
ESP_LOGI(TAG, "Initializing board");
12+
13+
set_codec_board_type(CONFIG_LK_EXAMPLE_CODEC_BOARD_TYPE);
14+
// Notes when use playback and record at same time, must set reuse_dev = false
15+
codec_init_cfg_t cfg = {
16+
.reuse_dev = false
17+
};
18+
init_codec(&cfg);
19+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#pragma once
2+
3+
#ifdef __cplusplus
4+
extern "C" {
5+
#endif
6+
7+
/// Initialize board.
8+
void board_init(void);
9+
10+
#ifdef __cplusplus
11+
}
12+
#endif

0 commit comments

Comments
 (0)