Skip to content

Commit

Permalink
made processed-audio more robust to handle form data and raw binary
Browse files Browse the repository at this point in the history
  • Loading branch information
makeiteasierapps committed Feb 15, 2024
1 parent 7b9eb78 commit 0dc51f8
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 81 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,6 @@ _site/

# Ignore folders generated by Bundler
.bundle/
vendor/
vendor/

venv/
4 changes: 2 additions & 2 deletions docs/guides/use_python_recorder.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ Here's a brief overview of the script's parameters:
-u: --base-url(required): The URL to which the recordings are sent.
-t: --token(required): API token for server authentication.
-s: --seconds: Duration of recording segments in seconds (default: 30).
-m: --sensitivity: Microphone sensitivity threshold (0.0 to 100.0, default: 35.0). Set to 0 for continuous recording.
-m: --sensitivity: Microphone sensitivity threshold (0.0 to 100.0, default: 0). Set to 0 for continuous recording.
-l: --save: Save recordings locally.
-v: --verbose: Enable verbose output for debugging.
```
Expand All @@ -91,5 +91,5 @@ And that is it, you should now be able to record things locally, and test the fr
#### **Important Notes**

- Ensure your base_url and token are correct to successfully send recordings.
- Adjust the sensitivity to your microphone setup to avoid missing recordings or record silance.
- Adjust the sensitivity to your microphone setup to avoid missing recordings or record silance. Too high will make the audio unable to be transcribed.
- Use the save option if you want to keep local copies of the recordings (file names "recording{timestamp}.wav").
178 changes: 100 additions & 78 deletions supabase/functions/process-audio/index.ts
Original file line number Diff line number Diff line change
@@ -1,97 +1,119 @@
import { serve } from "https://deno.land/[email protected]/http/server.ts";
import OpenAI, { toFile } from "https://deno.land/x/[email protected]/mod.ts";
import { serve } from 'https://deno.land/std/http/server.ts';
import { multiParser } from 'https://deno.land/x/[email protected]/mod.ts';
import OpenAI, { toFile } from 'https://deno.land/x/[email protected]/mod.ts';

import { corsHeaders } from "../common/cors.ts";
import { supabaseClient } from "../common/supabaseClient.ts";
import { corsHeaders } from '../common/cors.ts';
import { supabaseClient } from '../common/supabaseClient.ts';

const processAudio = async (req: Request) => {

if (req.method !== "POST") {
return new Response("Method Not Allowed", { status: 405 });
}
if (req.method !== 'POST') {
return new Response('Method Not Allowed', { status: 405 });
}

const supabase = supabaseClient(req);
const openaiClient = new OpenAI({
apiKey: Deno.env.get("OPENAI_API_KEY"),
});
const supabase = supabaseClient(req);
const openaiClient = new OpenAI({
apiKey: Deno.env.get('OPENAI_API_KEY'),
});

// Validate Content-Type
const contentType = req.headers.get("Content-Type") || "";
if (!contentType.includes("audio/wav") && !contentType.includes("audio/x-wav")) {
return new Response("Unsupported Media Type", { status: 415 });
}
const contentType = req.headers.get('Content-Type') || '';
let arrayBuffer: ArrayBuffer;
let filenameTimestamp = `audio_${Date.now()}.wav`;

const arrayBuffer = await req.arrayBuffer();
if (contentType.includes('multipart/form-data')) {
const form = await multiParser(req);
if (!form || !form.files || !form.files.file) {
return new Response('File not found in form', {
status: 400,
headers: corsHeaders,
});
}
console.log('Form:', form);
const file = form.files.file;
arrayBuffer = file.content.buffer;
filenameTimestamp = file.filename || filenameTimestamp;
} else {
arrayBuffer = await req.arrayBuffer();
}

let transcript: string;
let embeddings: any;
try {
const filenameTimestamp = `adeus_wav_${Date.now()}.wav`;
const wavFile = await toFile(arrayBuffer, filenameTimestamp);
let transcript: string;
let embeddings: any;
try {
const filenameTimestamp = `adeus_wav_${Date.now()}.wav`;
const wavFile = await toFile(arrayBuffer, filenameTimestamp);
console.log(typeof wavFile, wavFile);

// const { data, error } = await supabase.storage
// .from("test")
// .upload(filenameTimestamp, wavFile);
// const { data, error } = await supabase.storage
// .from("test")
// .upload(filenameTimestamp, wavFile);

// if (error) {
// console.error("Error uploading file:", error);
// }
// if (error) {
// console.error("Error uploading file:", error);
// }

const transcriptResponse = await openaiClient.audio.transcriptions.create({
file: await toFile(wavFile, filenameTimestamp),
model: "whisper-1",
prompt:
'If this audio file does not contain any speech, please return "None"',
});
transcript = transcriptResponse.text;
let transcriptLowered = transcript.toLowerCase();
// ("thank" in transcriptLowered &&
// "watch" in transcriptLowered &&
// "video" in transcriptLowered)
if (
transcript == "None" ||
transcript == "" ||
transcript == null ||
(transcriptLowered.includes("thank") &&
transcriptLowered.includes("watch"))
) {
return new Response(JSON.stringify({ message: "No transcript found." }), {
headers: { ...corsHeaders, "Content-Type": "application/json" },
status: 200,
});
}
const transcriptResponse =
await openaiClient.audio.transcriptions.create({
file: wavFile,
model: 'whisper-1',
prompt: 'If this audio file does not contain any speech, please return "None"',
});
transcript = transcriptResponse.text;
let transcriptLowered = transcript.toLowerCase();
// ("thank" in transcriptLowered &&
// "watch" in transcriptLowered &&
// "video" in transcriptLowered)
if (
transcript == 'None' ||
transcript == '' ||
transcript == null ||
(transcriptLowered.includes('thank') &&
transcriptLowered.includes('watch'))
) {
return new Response(
JSON.stringify({ message: 'No transcript found.' }),
{
headers: {
...corsHeaders,
'Content-Type': 'application/json',
},
status: 200,
}
);
}

console.log("Transcript:", transcript);
console.log('Transcript:', transcript);

const embeddingsResponse = await openaiClient.embeddings.create({
model: "text-embedding-ada-002",
input: transcript.replace(/\n/g, " ").replace(/\s{2,}/g, " "),
});
embeddings = embeddingsResponse.data[0].embedding;
console.log("Embeddings:", embeddings);
const embeddingsResponse = await openaiClient.embeddings.create({
model: 'text-embedding-ada-002',
input: transcript.replace(/\n/g, ' ').replace(/\s{2,}/g, ' '),
});
embeddings = embeddingsResponse.data[0].embedding;
console.log('Embeddings:', embeddings);

const { data, error } = await supabase
.from("records")
.insert({ raw_text: transcript, embeddings: embeddings });
const { data, error } = await supabase
.from('records')
.insert({ raw_text: transcript, embeddings: embeddings });

if (error) {
console.error("Error inserting record:", error);
if (error) {
console.error('Error inserting record:', error);
}
} catch (error) {
console.error('Transcription error:', error);
return new Response(JSON.stringify({ error: error.message }), {
headers: { ...corsHeaders, 'Content-Type': 'application/json' },
status: 500,
});
}
} catch (error) {
console.error("Transcription error:", error);
return new Response(JSON.stringify({ error: error.message }), {
headers: { ...corsHeaders, "Content-Type": "application/json" },
status: 500,
});
}

return new Response(
JSON.stringify({ message: "Audio transcribed successfully.", transcript }),
{
headers: { ...corsHeaders, "Content-Type": "application/json" },
status: 200,
}
);
return new Response(
JSON.stringify({
message: 'Audio transcribed successfully.',
transcript,
}),
{
headers: { ...corsHeaders, 'Content-Type': 'application/json' },
status: 200,
}
);
};

serve(processAudio);

0 comments on commit 0dc51f8

Please sign in to comment.