made processed-audio more robust to handle form data and raw binary

adamcohenhillel · Feb 15, 2024 · 0dc51f8 · 0dc51f8
1 parent 7b9eb78
commit 0dc51f8
Show file tree

Hide file tree

Showing 3 changed files with 105 additions and 81 deletions.
diff --git a/.gitignore b/.gitignore
@@ -66,4 +66,6 @@ _site/
 
 # Ignore folders generated by Bundler
 .bundle/
-vendor/
+vendor/
+
+venv/
diff --git a/docs/guides/use_python_recorder.md b/docs/guides/use_python_recorder.md
@@ -69,7 +69,7 @@ Here's a brief overview of the script's parameters:
     -u: --base-url(required): The URL to which the recordings are sent.
     -t: --token(required): API token for server authentication.
     -s: --seconds: Duration of recording segments in seconds (default: 30).
-    -m: --sensitivity: Microphone sensitivity threshold (0.0 to 100.0, default: 35.0). Set to 0 for continuous recording.
+    -m: --sensitivity: Microphone sensitivity threshold (0.0 to 100.0, default: 0). Set to 0 for continuous recording.
     -l: --save: Save recordings locally.
     -v: --verbose: Enable verbose output for debugging.
 ```
@@ -91,5 +91,5 @@ And that is it, you should now be able to record things locally, and test the fr
 #### **Important Notes**
 
 - Ensure your base_url and token are correct to successfully send recordings.
-- Adjust the sensitivity to your microphone setup to avoid missing recordings or record silance.
+- Adjust the sensitivity to your microphone setup to avoid missing recordings or record silance. Too high will make the audio unable to be transcribed.
 - Use the save option if you want to keep local copies of the recordings (file names "recording{timestamp}.wav").
diff --git a/supabase/functions/process-audio/index.ts b/supabase/functions/process-audio/index.ts
@@ -1,97 +1,119 @@
-import { serve } from "https://deno.land/[email protected]/http/server.ts";
-import OpenAI, { toFile } from "https://deno.land/x/[email protected]/mod.ts";
+import { serve } from 'https://deno.land/std/http/server.ts';
+import { multiParser } from 'https://deno.land/x/[email protected]/mod.ts';
+import OpenAI, { toFile } from 'https://deno.land/x/[email protected]/mod.ts';
 
-import { corsHeaders } from "../common/cors.ts";
-import { supabaseClient } from "../common/supabaseClient.ts";
+import { corsHeaders } from '../common/cors.ts';
+import { supabaseClient } from '../common/supabaseClient.ts';
 
 const processAudio = async (req: Request) => {
-
-  if (req.method !== "POST") {
-    return new Response("Method Not Allowed", { status: 405 });
-  }
+    if (req.method !== 'POST') {
+        return new Response('Method Not Allowed', { status: 405 });
+    }
 
-  const supabase = supabaseClient(req);
-  const openaiClient = new OpenAI({
-    apiKey: Deno.env.get("OPENAI_API_KEY"),
-  });
+    const supabase = supabaseClient(req);
+    const openaiClient = new OpenAI({
+        apiKey: Deno.env.get('OPENAI_API_KEY'),
+    });
 
-  // Validate Content-Type
-  const contentType = req.headers.get("Content-Type") || "";
-  if (!contentType.includes("audio/wav") && !contentType.includes("audio/x-wav")) {
-    return new Response("Unsupported Media Type", { status: 415 });
-  }
+    const contentType = req.headers.get('Content-Type') || '';
+    let arrayBuffer: ArrayBuffer;
+    let filenameTimestamp = `audio_${Date.now()}.wav`;
 
-  const arrayBuffer = await req.arrayBuffer();
+    if (contentType.includes('multipart/form-data')) {
+        const form = await multiParser(req);
+        if (!form || !form.files || !form.files.file) {
+            return new Response('File not found in form', {
+                status: 400,
+                headers: corsHeaders,
+            });
+        }
+        console.log('Form:', form);
+        const file = form.files.file;
+        arrayBuffer = file.content.buffer;
+        filenameTimestamp = file.filename || filenameTimestamp;
+    } else {
+        arrayBuffer = await req.arrayBuffer();
+    }
 
-  let transcript: string;
-  let embeddings: any;
-  try {
-    const filenameTimestamp = `adeus_wav_${Date.now()}.wav`;
-    const wavFile = await toFile(arrayBuffer, filenameTimestamp);
+    let transcript: string;
+    let embeddings: any;
+    try {
+        const filenameTimestamp = `adeus_wav_${Date.now()}.wav`;
+        const wavFile = await toFile(arrayBuffer, filenameTimestamp);
+        console.log(typeof wavFile, wavFile);
 
-    // const { data, error } = await supabase.storage
-    //   .from("test")
-    //   .upload(filenameTimestamp, wavFile);
+        // const { data, error } = await supabase.storage
+        //   .from("test")
+        //   .upload(filenameTimestamp, wavFile);
 
-    // if (error) {
-    //   console.error("Error uploading file:", error);
-    // }
+        // if (error) {
+        //   console.error("Error uploading file:", error);
+        // }
 
-    const transcriptResponse = await openaiClient.audio.transcriptions.create({
-      file: await toFile(wavFile, filenameTimestamp),
-      model: "whisper-1",
-      prompt:
-        'If this audio file does not contain any speech, please return "None"',
-    });
-    transcript = transcriptResponse.text;
-    let transcriptLowered = transcript.toLowerCase();
-    // ("thank" in transcriptLowered &&
-    //     "watch" in transcriptLowered &&
-    //     "video" in transcriptLowered)
-    if (
-      transcript == "None" ||
-      transcript == "" ||
-      transcript == null ||
-      (transcriptLowered.includes("thank") &&
-        transcriptLowered.includes("watch"))
-    ) {
-      return new Response(JSON.stringify({ message: "No transcript found." }), {
-        headers: { ...corsHeaders, "Content-Type": "application/json" },
-        status: 200,
-      });
-    }
+        const transcriptResponse =
+            await openaiClient.audio.transcriptions.create({
+                file: wavFile,
+                model: 'whisper-1',
+                prompt: 'If this audio file does not contain any speech, please return "None"',
+            });
+        transcript = transcriptResponse.text;
+        let transcriptLowered = transcript.toLowerCase();
+        // ("thank" in transcriptLowered &&
+        //     "watch" in transcriptLowered &&
+        //     "video" in transcriptLowered)
+        if (
+            transcript == 'None' ||
+            transcript == '' ||
+            transcript == null ||
+            (transcriptLowered.includes('thank') &&
+                transcriptLowered.includes('watch'))
+        ) {
+            return new Response(
+                JSON.stringify({ message: 'No transcript found.' }),
+                {
+                    headers: {
+                        ...corsHeaders,
+                        'Content-Type': 'application/json',
+                    },
+                    status: 200,
+                }
+            );
+        }
 
-    console.log("Transcript:", transcript);
+        console.log('Transcript:', transcript);
 
-    const embeddingsResponse = await openaiClient.embeddings.create({
-      model: "text-embedding-ada-002",
-      input: transcript.replace(/\n/g, " ").replace(/\s{2,}/g, " "),
-    });
-    embeddings = embeddingsResponse.data[0].embedding;
-    console.log("Embeddings:", embeddings);
+        const embeddingsResponse = await openaiClient.embeddings.create({
+            model: 'text-embedding-ada-002',
+            input: transcript.replace(/\n/g, ' ').replace(/\s{2,}/g, ' '),
+        });
+        embeddings = embeddingsResponse.data[0].embedding;
+        console.log('Embeddings:', embeddings);
 
-    const { data, error } = await supabase
-      .from("records")
-      .insert({ raw_text: transcript, embeddings: embeddings });
+        const { data, error } = await supabase
+            .from('records')
+            .insert({ raw_text: transcript, embeddings: embeddings });
 
-    if (error) {
-      console.error("Error inserting record:", error);
+        if (error) {
+            console.error('Error inserting record:', error);
+        }
+    } catch (error) {
+        console.error('Transcription error:', error);
+        return new Response(JSON.stringify({ error: error.message }), {
+            headers: { ...corsHeaders, 'Content-Type': 'application/json' },
+            status: 500,
+        });
     }
-  } catch (error) {
-    console.error("Transcription error:", error);
-    return new Response(JSON.stringify({ error: error.message }), {
-      headers: { ...corsHeaders, "Content-Type": "application/json" },
-      status: 500,
-    });
-  }
 
-  return new Response(
-    JSON.stringify({ message: "Audio transcribed successfully.", transcript }),
-    {
-      headers: { ...corsHeaders, "Content-Type": "application/json" },
-      status: 200,
-    }
-  );
+    return new Response(
+        JSON.stringify({
+            message: 'Audio transcribed successfully.',
+            transcript,
+        }),
+        {
+            headers: { ...corsHeaders, 'Content-Type': 'application/json' },
+            status: 200,
+        }
+    );
 };
 
 serve(processAudio);
-Original file line number
+Diff line change
@@ Expand Up / @@ -66,4 +66,6 @@ _site/ @@
     # Ignore folders generated by Bundler
     .bundle/
-    vendor/
+    vendor/
+    venv/