Skip to content

Commit ac1db65

Browse files
authored
brianyin/ajs-310-play-local-audio-file-utility (#788)
1 parent 7fc7808 commit ac1db65

File tree

11 files changed

+407
-3
lines changed

11 files changed

+407
-3
lines changed

.changeset/hot-taxis-judge.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@livekit/agents': patch
3+
---
4+
5+
Add utility to play local audio file to livekit

REUSE.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,9 @@ SPDX-License-Identifier = "CC-BY-NC-SA-4.0"
3636
path = ["**/.gitattributes", "**.wav", "**/__snapshots__/**"]
3737
SPDX-FileCopyrightText = "2024 LiveKit, Inc."
3838
SPDX-License-Identifier = "Apache-2.0"
39+
40+
# audio resources
41+
[[annotations]]
42+
path = ["agents/resources/*.ogg", "agents/resources/NOTICE"]
43+
SPDX-FileCopyrightText = "2024 LiveKit, Inc."
44+
SPDX-License-Identifier = "Apache-2.0"

agents/package.json

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,14 @@
3434
"api:update": "api-extractor run --local --typescript-compiler-folder ../node_modules/typescript --verbose"
3535
},
3636
"devDependencies": {
37-
"@livekit/rtc-node": "^0.13.12",
37+
"@ffmpeg-installer/ffmpeg": "^1.1.0",
38+
"@livekit/rtc-node": "^0.13.13",
3839
"@microsoft/api-extractor": "^7.35.0",
40+
"@types/fluent-ffmpeg": "^2.1.28",
3941
"@types/json-schema": "^7.0.15",
4042
"@types/node": "^22.5.5",
4143
"@types/ws": "^8.5.10",
44+
"fluent-ffmpeg": "^2.1.3",
4245
"tsup": "^8.4.0",
4346
"typescript": "^5.0.0"
4447
},
@@ -50,8 +53,8 @@
5053
"commander": "^12.0.0",
5154
"heap-js": "^2.6.0",
5255
"json-schema": "^0.4.0",
53-
"openai": "^4.91.1",
5456
"livekit-server-sdk": "^2.13.3",
57+
"openai": "^4.91.1",
5558
"pidusage": "^4.0.1",
5659
"pino": "^8.19.0",
5760
"pino-pretty": "^11.0.0",

agents/resources/NOTICE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
keyboard-typing.ogg by Anton -- https://freesound.org/s/137/ -- License: Attribution 4.0
2+
keyboard-typing2.opg by Anton -- https://freesound.org/s/137/ -- License: Attribution 4.0
71.9 KB
Binary file not shown.
27.4 KB
Binary file not shown.
186 KB
Binary file not shown.

agents/src/audio.ts

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,27 @@
11
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
22
//
33
// SPDX-License-Identifier: Apache-2.0
4+
import ffmpegInstaller from '@ffmpeg-installer/ffmpeg';
45
import { AudioFrame } from '@livekit/rtc-node';
6+
import ffmpeg from 'fluent-ffmpeg';
7+
import type { ReadableStream } from 'node:stream/web';
58
import { log } from './log.js';
9+
import { createStreamChannel } from './stream/stream_channel.js';
610
import type { AudioBuffer } from './utils.js';
711

12+
ffmpeg.setFfmpegPath(ffmpegInstaller.path);
13+
14+
export interface AudioDecodeOptions {
15+
sampleRate?: number;
16+
numChannels?: number;
17+
/**
18+
* Audio format hint (e.g., 'mp3', 'ogg', 'wav', 'opus')
19+
* If not provided, FFmpeg will auto-detect
20+
*/
21+
format?: string;
22+
abortSignal?: AbortSignal;
23+
}
24+
825
export function calculateAudioDurationSeconds(frame: AudioBuffer) {
926
// TODO(AJS-102): use frame.durationMs once available in rtc-node
1027
return Array.isArray(frame)
@@ -72,3 +89,117 @@ export class AudioByteStream {
7289
return frames;
7390
}
7491
}
92+
93+
/**
94+
* Decode an audio file into AudioFrame instances
95+
*
96+
* @param filePath - Path to the audio file
97+
* @param options - Decoding options
98+
* @returns AsyncGenerator that yields AudioFrame objects
99+
*
100+
* @example
101+
* ```typescript
102+
* for await (const frame of audioFramesFromFile('audio.ogg', { sampleRate: 48000 })) {
103+
* console.log('Frame:', frame.samplesPerChannel, 'samples');
104+
* }
105+
* ```
106+
*/
107+
export function audioFramesFromFile(
108+
filePath: string,
109+
options: AudioDecodeOptions = {},
110+
): ReadableStream<AudioFrame> {
111+
const sampleRate = options.sampleRate ?? 48000;
112+
const numChannels = options.numChannels ?? 1;
113+
114+
const audioStream = new AudioByteStream(sampleRate, numChannels);
115+
const channel = createStreamChannel<AudioFrame>();
116+
const logger = log();
117+
118+
// TODO (Brian): decode WAV using a custom decoder instead of FFmpeg
119+
const command = ffmpeg(filePath)
120+
.inputOptions([
121+
'-probesize',
122+
'32',
123+
'-analyzeduration',
124+
'0',
125+
'-fflags',
126+
'+nobuffer+flush_packets',
127+
'-flags',
128+
'low_delay',
129+
])
130+
.format('s16le') // signed 16-bit little-endian PCM to be consistent cross-platform
131+
.audioChannels(numChannels)
132+
.audioFrequency(sampleRate);
133+
134+
let commandRunning = true;
135+
136+
const onClose = () => {
137+
logger.debug('Audio file playback aborted');
138+
139+
channel.close();
140+
if (commandRunning) {
141+
commandRunning = false;
142+
command.kill('SIGKILL');
143+
}
144+
};
145+
146+
const outputStream = command.pipe();
147+
options.abortSignal?.addEventListener('abort', onClose, { once: true });
148+
149+
outputStream.on('data', (chunk: Buffer) => {
150+
const arrayBuffer = chunk.buffer.slice(
151+
chunk.byteOffset,
152+
chunk.byteOffset + chunk.byteLength,
153+
) as ArrayBuffer;
154+
155+
const frames = audioStream.write(arrayBuffer);
156+
for (const frame of frames) {
157+
channel.write(frame);
158+
}
159+
});
160+
161+
outputStream.on('end', () => {
162+
const frames = audioStream.flush();
163+
for (const frame of frames) {
164+
channel.write(frame);
165+
}
166+
commandRunning = false;
167+
channel.close();
168+
});
169+
170+
outputStream.on('error', (err: Error) => {
171+
logger.error(err);
172+
commandRunning = false;
173+
onClose();
174+
});
175+
176+
return channel.stream();
177+
}
178+
179+
/**
180+
* Loop audio frames from a file indefinitely
181+
*
182+
* @param filePath - Path to the audio file
183+
* @param options - Decoding options
184+
* @returns AsyncGenerator that yields AudioFrame objects in an infinite loop
185+
*/
186+
export async function* loopAudioFramesFromFile(
187+
filePath: string,
188+
options: AudioDecodeOptions = {},
189+
): AsyncGenerator<AudioFrame, void, unknown> {
190+
const frames: AudioFrame[] = [];
191+
const logger = log();
192+
193+
for await (const frame of audioFramesFromFile(filePath, options)) {
194+
frames.push(frame);
195+
yield frame;
196+
}
197+
198+
while (!options.abortSignal?.aborted) {
199+
for (const frame of frames) {
200+
yield frame;
201+
}
202+
}
203+
204+
logger.debug('Audio file playback loop finished');
205+
}

agents/src/stream/stream_channel.test.ts

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,4 +126,41 @@ describe('StreamChannel', () => {
126126
const nextResult = await reader.read();
127127
expect(nextResult.done).toBe(true);
128128
});
129+
130+
it('should gracefully handle close while read is pending', async () => {
131+
const channel = createStreamChannel<string>();
132+
const reader = channel.stream().getReader();
133+
134+
const readPromise = reader.read();
135+
136+
await channel.close();
137+
138+
const result = await readPromise;
139+
expect(result.done).toBe(true);
140+
expect(result.value).toBeUndefined();
141+
});
142+
143+
it('should complete all pending reads when closed', async () => {
144+
const channel = createStreamChannel<number>();
145+
const reader = channel.stream().getReader();
146+
147+
const read1 = reader.read();
148+
const read2 = reader.read();
149+
const read3 = reader.read();
150+
151+
await channel.write(42);
152+
await channel.write(43);
153+
await channel.close();
154+
155+
const result1 = await read1;
156+
expect(result1.done).toBe(false);
157+
expect(result1.value).toBe(42);
158+
159+
const result2 = await read2;
160+
expect(result2.done).toBe(false);
161+
expect(result2.value).toBe(43);
162+
163+
const result3 = await read3;
164+
expect(result3.done).toBe(true);
165+
});
129166
});
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
import {
5+
type JobContext,
6+
WorkerOptions,
7+
cli,
8+
defineAgent,
9+
log,
10+
loopAudioFramesFromFile,
11+
} from '@livekit/agents';
12+
import { AudioSource, LocalAudioTrack, TrackPublishOptions, TrackSource } from '@livekit/rtc-node';
13+
import { dirname, join } from 'node:path';
14+
import { fileURLToPath } from 'node:url';
15+
16+
export default defineAgent({
17+
entry: async (ctx: JobContext) => {
18+
const logger = log();
19+
20+
await ctx.connect();
21+
22+
logger.info('Playing audio file to LiveKit track...');
23+
24+
const audioSource = new AudioSource(48000, 1);
25+
26+
const track = LocalAudioTrack.createAudioTrack('background_audio', audioSource);
27+
28+
const publication = await ctx.room.localParticipant!.publishTrack(
29+
track,
30+
new TrackPublishOptions({
31+
source: TrackSource.SOURCE_MICROPHONE,
32+
}),
33+
);
34+
35+
await publication.waitForSubscription();
36+
37+
logger.info(`Audio track published: ${publication?.sid}`);
38+
39+
const currentDir = dirname(fileURLToPath(import.meta.url));
40+
const resourcesPath = join(currentDir, '../../agents/resources');
41+
const audioFile = join(resourcesPath, 'office-ambience.ogg');
42+
43+
logger.info(`Playing: ${audioFile}`);
44+
45+
const abortController = new AbortController();
46+
47+
ctx.addShutdownCallback(async () => {
48+
abortController.abort();
49+
});
50+
51+
let frameCount = 0;
52+
for await (const frame of loopAudioFramesFromFile(audioFile, {
53+
sampleRate: 48000,
54+
numChannels: 1,
55+
abortSignal: abortController.signal,
56+
})) {
57+
await audioSource.captureFrame(frame);
58+
frameCount++;
59+
60+
if (frameCount % 100 === 0) {
61+
logger.info(`Played ${frameCount} frames (${(frameCount * 0.1).toFixed(1)}s)`);
62+
}
63+
}
64+
},
65+
});
66+
67+
cli.runApp(new WorkerOptions({ agent: fileURLToPath(import.meta.url) }));

0 commit comments

Comments
 (0)