diff --git a/.env.example b/.env.example
index afd8a7d..1f2f725 100644
--- a/.env.example
+++ b/.env.example
@@ -1 +1,2 @@
-VITE_OPENAI_KEY="sk-..."
\ No newline at end of file
+VITE_OPENAI_KEY="sk-..."
+VITE_GEMINI_KEY="..."
\ No newline at end of file
diff --git a/index.html b/index.html
index c7913fd..823f9b0 100644
--- a/index.html
+++ b/index.html
@@ -21,6 +21,13 @@
   <body>
     <button style='font-size: 24px;' id='letsGo'>Start</button>
 
+    <div style='margin-top:20px;margin-bottom:20px;'>
+      <select id='aiSelector'>
+        <option value="gemini">Gemini</option>
+        <option value="gpt">GPT-4</option>
+      </select>
+    </div>
+
     <video autoplay playsinline webkit-playsinline muted hidden></video>
     <canvas id='canvas' width='800' height='600'></canvas>
 
diff --git a/package-lock.json b/package-lock.json
index 3c782ef..a38b177 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -7,6 +7,9 @@
     "": {
       "name": "repo",
       "version": "0.0.0",
+      "dependencies": {
+        "@google/generative-ai": "^0.1.1"
+      },
       "devDependencies": {
         "typescript": "^5.2.2",
         "vite": "^5.0.0"
@@ -364,6 +367,14 @@
         "node": ">=12"
       }
     },
+    "node_modules/@google/generative-ai": {
+      "version": "0.1.1",
+      "resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.1.1.tgz",
+      "integrity": "sha512-cbzKa8mT9YkTrT4XUuENIuvlqiJjwDgcD2Ks4L99Az9dWLgdXn8xnETEAZLOpqzoGx+1PuATZqlUnVRAeLbMgA==",
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
     "node_modules/@rollup/rollup-android-arm-eabi": {
       "version": "4.7.0",
       "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.7.0.tgz",
diff --git a/package.json b/package.json
index d9f06d6..409e8af 100644
--- a/package.json
+++ b/package.json
@@ -11,5 +11,8 @@
   "devDependencies": {
     "typescript": "^5.2.2",
     "vite": "^5.0.0"
+  },
+  "dependencies": {
+    "@google/generative-ai": "^0.1.1"
   }
 }
diff --git a/src/gemini.ts b/src/gemini.ts
new file mode 100644
index 0000000..03ad1c1
--- /dev/null
+++ b/src/gemini.ts
@@ -0,0 +1,41 @@
+import { GoogleGenerativeAI } from "@google/generative-ai";
+
+const DEFAULT_DEV_API_KEY = import.meta.env.VITE_GEMINI_KEY;
+
+const GEMINI_SYSTEM_PROMPT = `the user is dictating with his or her camera on.
+they are showing you things visually and giving you text prompts.
+be very brief and concise.
+be extremely concise. this is very important for my career. do not ramble.
+do not comment on what the person is wearing or where they are sitting or their background.
+focus on their gestures and the question they ask you.
+do not mention that there are a sequence of pictures. focus only on the image or the images necessary to answer the question.
+don't comment if they are smiling. don't comment if they are frowning. just focus on what they're asking.
+
+----- USER PROMPT BELOW -----
+
+{{USER_PROMPT}}
+`;
+
+const genAI = new GoogleGenerativeAI(DEFAULT_DEV_API_KEY);
+
+export async function makeGeminiRequest(text: string, imageUrl: string) {
+  const model = genAI.getGenerativeModel({ model: "gemini-pro-vision" });
+
+  // split imageUrl of format "data:...;base64,<data>"
+  // into 1) mime-type and 2) just the data
+  let [mimeType, data] = imageUrl.split(";base64,");
+  mimeType = mimeType.split(":")[1];
+
+  const result = await model.generateContent([
+    GEMINI_SYSTEM_PROMPT.replace("{{USER_PROMPT}}", text),
+    {
+      inlineData: {
+        mimeType,
+        data,
+      },
+    },
+  ]);
+  const response = await result.response;
+  const content = await response.text();
+  return content;
+}
diff --git a/src/main.ts b/src/main.ts
index 6e5967d..ec2d564 100644
--- a/src/main.ts
+++ b/src/main.ts
@@ -1,7 +1,8 @@
-import { makeRequest } from "./openai";
+import { makeOpenAIRequest } from "./openai";
 import { startDictation, stopDictation, restartDictation } from "./dictation";
 import { startCamera, stopCamera } from "./camera";
 import { scaleAndStackImagesAndGetBase64 } from "./imageStacker";
+import { makeGeminiRequest } from "./gemini";
 
 const IMAGE_STACK_SIZE = 3;
 
@@ -35,7 +36,16 @@ function dictationEventHandler(message?: string) {
     const base64 = scaleAndStackImagesAndGetBase64(imageStack);
     const textPrompt = unsentMessages.join(" ");
     unsentMessages = [];
-    makeRequest(textPrompt, base64).then((result) => {
+
+    let aiFunction = null;
+    aiFunction =
+      document.querySelector("#aiSelector")!.value === "gemini"
+        ? makeGeminiRequest
+        : makeOpenAIRequest;
+
+    aiFunction(textPrompt, base64).then((result) => {
+      console.log("result", result);
+
       // the dictation is catching its own speech!!!!! stop dictation before speaking.
       stopDictation();
       let utterance = new SpeechSynthesisUtterance(result);
diff --git a/src/openai.ts b/src/openai.ts
index bb9c103..39903ee 100644
--- a/src/openai.ts
+++ b/src/openai.ts
@@ -10,7 +10,7 @@ do not mention that there are a sequence of pictures. focus only on the image or
 don't comment if they are smiling. don't comment if they are frowning. just focus on what they're asking.
 `;
 
-export async function makeRequest(
+export async function makeOpenAIRequest(
   text: string,
   imageUrl: string,
   apiKey = DEFAULT_DEV_API_KEY
diff --git a/vite.config.js b/vite.config.js
new file mode 100644
index 0000000..cbd8786
--- /dev/null
+++ b/vite.config.js
@@ -0,0 +1,8 @@
+import { defineConfig } from "vite";
+
+export default defineConfig({
+  // https://stackoverflow.com/a/75953479
+  optimizeDeps: {
+    exclude: ["@google/generative-ai"],
+  },
+});