forked from twilio-labs/call-gpt
-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.js
114 lines (94 loc) · 3.68 KB
/
app.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
require('dotenv').config();
require('colors');
const express = require('express');
const ExpressWs = require('express-ws');
const { GptService } = require('./services/gpt-service');
const { StreamService } = require('./services/stream-service');
const { TranscriptionService } = require('./services/transcription-service');
const { TextToSpeechService } = require('./services/tts-service');
const { recordingService } = require('./services/recording-service');
const VoiceResponse = require('twilio').twiml.VoiceResponse;
const app = express();
ExpressWs(app);
const PORT = process.env.PORT || 3000;
app.post('/incoming', (req, res) => {
try {
const response = new VoiceResponse();
const connect = response.connect();
connect.stream({ url: `wss://${process.env.SERVER}/connection` });
res.type('text/xml');
res.end(response.toString());
} catch (err) {
console.log(err);
}
});
app.ws('/connection', (ws) => {
try {
ws.on('error', console.error);
// Filled in from start message
let streamSid;
let callSid;
const gptService = new GptService();
const streamService = new StreamService(ws);
const transcriptionService = new TranscriptionService();
const ttsService = new TextToSpeechService({});
let marks = [];
let interactionCount = 0;
// Incoming from MediaStream
ws.on('message', function message(data) {
const msg = JSON.parse(data);
if (msg.event === 'start') {
streamSid = msg.start.streamSid;
callSid = msg.start.callSid;
streamService.setStreamSid(streamSid);
gptService.setCallSid(callSid);
// Set RECORDING_ENABLED='true' in .env to record calls
recordingService(ttsService, callSid).then(() => {
console.log(`Twilio -> Starting Media Stream for ${streamSid}`.underline.red);
ttsService.generate({partialResponseIndex: null, partialResponse: 'Hello! I understand you\'re looking for a pair of AirPods, is that correct?'}, 0);
});
} else if (msg.event === 'media') {
transcriptionService.send(msg.media.payload);
} else if (msg.event === 'mark') {
const label = msg.mark.name;
console.log(`Twilio -> Audio completed mark (${msg.sequenceNumber}): ${label}`.red);
marks = marks.filter(m => m !== msg.mark.name);
} else if (msg.event === 'stop') {
console.log(`Twilio -> Media stream ${streamSid} ended.`.underline.red);
}
});
transcriptionService.on('utterance', async (text) => {
// This is a bit of a hack to filter out empty utterances
if(marks.length > 0 && text?.length > 5) {
console.log('Twilio -> Interruption, Clearing stream'.red);
ws.send(
JSON.stringify({
streamSid,
event: 'clear',
})
);
}
});
transcriptionService.on('transcription', async (text) => {
if (!text) { return; }
console.log(`Interaction ${interactionCount} – STT -> GPT: ${text}`.yellow);
gptService.completion(text, interactionCount);
interactionCount += 1;
});
gptService.on('gptreply', async (gptReply, icount) => {
console.log(`Interaction ${icount}: GPT -> TTS: ${gptReply.partialResponse}`.green );
ttsService.generate(gptReply, icount);
});
ttsService.on('speech', (responseIndex, audio, label, icount) => {
console.log(`Interaction ${icount}: TTS -> TWILIO: ${label}`.blue);
streamService.buffer(responseIndex, audio);
});
streamService.on('audiosent', (markLabel) => {
marks.push(markLabel);
});
} catch (err) {
console.log(err);
}
});
app.listen(PORT);
console.log(`Server running on port ${PORT}`);