Prat011 · karansingh7773-rathore · Feb 28, 2026 · Mar 1, 2026
diff --git a/.gitignore b/.gitignore
@@ -43,6 +43,7 @@ node_modules/
 jspm_packages/
 dist-electron/
 dist/
+release/
 
 # Snowpack dependency directory (https://snowpack.dev/)
 web_modules/

diff --git a/README.md b/README.md
@@ -13,6 +13,7 @@ If you’re looking for a hosted desktop recording API, consider checking out [R
 - Git installed on your computer  
 - **Either** a Gemini API key (get it from [Google AI Studio](https://makersuite.google.com/app/apikey))
 - **Or** Ollama installed locally for private LLM usage (recommended for privacy)
+- *(Optional)* A free **Deepgram API key** for Live Voice Transcription & Translation
 
 ### Installation Steps
 
@@ -114,7 +115,8 @@ The built app will be in the `release` folder.
 3. **Keyboard Shortcuts**:
    - `Cmd/Ctrl + B`: Toggle window visibility
    - `Cmd/Ctrl + H`: Take screenshot
-   - 'Cmd/Enter': Get solution
+   - `Cmd/Ctrl + Shift + R`: Toggle Live Voice Recording & Transcription
+   - `Cmd/Enter`: Get solution
    - `Cmd/Ctrl + Arrow Keys`: Move window
 
 ## 🔧 Troubleshooting
@@ -169,10 +171,12 @@ If you see other errors:
 - AI analyzes images, documents, presentations, or problems
 - Get instant explanations, answers, and solutions
 
-### **Audio Intelligence**
-- Process audio files and recordings
-- Real-time transcription and analysis
-- Perfect for meeting notes and content review
+### **Audio Intelligence & Live Transcription**
+- Trigger live voice recording instantly with `Cmd/Ctrl + Shift + R`
+- Captures both **system audio** (meetings) and **microphone** simultaneously
+- **Real-time Transcription & Translation** powered by Deepgram `nova-3`
+- Instantly translates foreign languages to English (or other targets) live on screen
+- Submits exact transcripts directly to Gemini for instantaneous insights
 
 ### **Contextual Chat**
 - Chat with AI about anything you see on screen
@@ -230,7 +234,7 @@ If you see other errors:
 ## Technical Details
 
 ### **AI Models Supported**
-- **Gemini 2.0 Flash** - Latest Google AI with vision capabilities
+- **Gemini 2.5 Flash** / **Gemini 3 Flash** / **Gemini 2.5 Flash Lite** - High-speed, high-limit daily requests
 - **Llama 3.2** - Meta's advanced local model via Ollama
 - **CodeLlama** - Specialized coding assistance
 - **Mistral** - Lightweight, fast responses

diff --git a/electron-launcher.js b/electron-launcher.js
@@ -0,0 +1,21 @@
+// Launcher script that ensures ELECTRON_RUN_AS_NODE is not set
+// This is needed because ELECTRON_RUN_AS_NODE=1 in the system environment
+// forces Electron to run as Node.js instead of the browser process
+delete process.env.ELECTRON_RUN_AS_NODE;
+
+const proc = require('child_process');
+const electronPath = require('electron');
+
+const env = { ...process.env };
+delete env.ELECTRON_RUN_AS_NODE;
+
+const child = proc.spawn(electronPath, ['.'], {
+    stdio: 'inherit',
+    env: env
+});
+
+child.on('close', (code) => process.exit(code || 0));
+child.on('error', (err) => {
+    console.error('Failed to start electron:', err);
+    process.exit(1);
+});
diff --git a/electron/LLMHelper.ts b/electron/LLMHelper.ts
@@ -12,26 +12,34 @@ export class LLMHelper {
   private useOllama: boolean = false
   private ollamaModel: string = "llama3.2"
   private ollamaUrl: string = "http://localhost:11434"
+  private userInfo: string = ""
+  private geminiModelName: string = "gemini-2.5-flash"
+  private apiKey: string = ""
 
   constructor(apiKey?: string, useOllama: boolean = false, ollamaModel?: string, ollamaUrl?: string) {
     this.useOllama = useOllama
-    
+
     if (useOllama) {
       this.ollamaUrl = ollamaUrl || "http://localhost:11434"
       this.ollamaModel = ollamaModel || "gemma:latest" // Default fallback
       console.log(`[LLMHelper] Using Ollama with model: ${this.ollamaModel}`)
-      
+
       // Auto-detect and use first available model if specified model doesn't exist
       this.initializeOllamaModel()
     } else if (apiKey) {
+      this.apiKey = apiKey
       const genAI = new GoogleGenerativeAI(apiKey)
-      this.model = genAI.getGenerativeModel({ model: "gemini-2.0-flash" })
-      console.log("[LLMHelper] Using Google Gemini")
+      this.model = genAI.getGenerativeModel({ model: this.geminiModelName })
+      console.log(`[LLMHelper] Using Google Gemini (${this.geminiModelName})`)
     } else {
       throw new Error("Either provide Gemini API key or enable Ollama mode")
     }
   }
 
+  public setUserInfo(info: string) {
+    this.userInfo = info;
+  }
+
   private async fileToGenerativePart(imagePath: string) {
     const imageData = await fs.promises.readFile(imagePath)
     return {
@@ -124,7 +132,7 @@ export class LLMHelper {
   public async extractProblemFromImages(imagePaths: string[]) {
     try {
       const imageParts = await Promise.all(imagePaths.map(path => this.fileToGenerativePart(path)))
-      
+
       const prompt = `${this.systemPrompt}\n\nYou are a wingman. Please analyze these images and extract the following information in JSON format:\n{
   "problem_statement": "A clear statement of the problem or situation depicted in the images.",
   "context": "Relevant background or context from the images.",
@@ -171,7 +179,7 @@ export class LLMHelper {
   public async debugSolutionWithImages(problemInfo: any, currentCode: string, debugImagePaths: string[]) {
     try {
       const imageParts = await Promise.all(debugImagePaths.map(path => this.fileToGenerativePart(path)))
-      
+
       const prompt = `${this.systemPrompt}\n\nYou are a wingman. Given:\n1. The original problem or situation: ${JSON.stringify(problemInfo, null, 2)}\n2. The current response or approach: ${currentCode}\n3. The debug information in the provided images\n\nPlease analyze the debug information and provide feedback in this JSON format:\n{
   "solution": {
     "code": "The code or main answer here.",
@@ -203,7 +211,7 @@ export class LLMHelper {
           mimeType: "audio/mp3"
         }
       };
-      const prompt = `${this.systemPrompt}\n\nDescribe this audio clip in a short, concise answer. In addition to your main answer, suggest several possible actions or responses the user could take next based on the audio. Do not return a structured JSON object, just answer naturally as you would to a user.`;
+      const prompt = `${this.systemPrompt}\n\nThe provided audio clip contains a question or a problem statement. Please act as a solver and provide the direct answer or solution to whatever is being asked in the audio. Do not return a structured JSON object, just answer naturally as you would to a user, and format your answer clearly with markdown.`;
       const result = await this.model.generateContent([prompt, audioPart]);
       const response = await result.response;
       const text = response.text();
@@ -222,7 +230,16 @@ export class LLMHelper {
           mimeType
         }
       };
-      const prompt = `${this.systemPrompt}\n\nDescribe this audio clip in a short, concise answer. In addition to your main answer, suggest several possible actions or responses the user could take next based on the audio. Do not return a structured JSON object, just answer naturally as you would to a user and be concise.`;
+
+      const userContextStr = this.userInfo ? `\n\nAbout the User:\n${this.userInfo}` : "";
+      const prompt = `You are an invisible, real-time meeting assistant. The user is currently in a live professional meeting. The following text is a direct transcription of a question the user just spoke into their microphone.${userContextStr}
+
+Your Constraints:
+- Answer the question directly and concisely.
+- NEVER acknowledge that this text comes from an audio transcription.
+- NEVER describe the speaker's voice, tone, or audio quality.
+- Provide actionable answers that the user can immediately use in their meeting.`;
+
       const result = await this.model.generateContent([prompt, audioPart]);
       const response = await result.response;
       const text = response.text();
@@ -233,6 +250,38 @@ export class LLMHelper {
     }
   }
 
+  public async analyzeAudioFromBase64Stream(data: string, mimeType: string, onChunk: (chunk: string) => void) {
+    try {
+      const audioPart = {
+        inlineData: {
+          data,
+          mimeType
+        }
+      };
+
+      const userContextStr = this.userInfo ? `\n\nAbout the User:\n${this.userInfo}` : "";
+      const prompt = `You are an invisible, real-time meeting assistant. The user is currently in a live professional meeting. The following text is a direct transcription of a question the user just spoke into their microphone.${userContextStr}
+
+Your Constraints:
+- Answer the question directly and concisely.
+- NEVER acknowledge that this text comes from an audio transcription.
+- NEVER describe the speaker's voice, tone, or audio quality.
+- Provide actionable answers that the user can immediately use in their meeting.`;
+
+      const result = await this.model.generateContentStream([prompt, audioPart]);
+      let fullText = "";
+      for await (const chunk of result.stream) {
+        const chunkText = chunk.text();
+        fullText += chunkText;
+        onChunk(chunkText);
+      }
+      return { text: fullText, timestamp: Date.now() };
+    } catch (error) {
+      console.error("Error analyzing audio from base64 stream:", error);
+      throw error;
+    }
+  }
+
   public async analyzeImageFile(imagePath: string) {
     try {
       const imageData = await fs.promises.readFile(imagePath);
@@ -253,6 +302,31 @@ export class LLMHelper {
     }
   }
 
+  public async analyzeImageFileStream(imagePath: string, onChunk: (chunk: string) => void) {
+    try {
+      const imageData = await fs.promises.readFile(imagePath);
+      const imagePart = {
+        inlineData: {
+          data: imageData.toString("base64"),
+          mimeType: "image/png"
+        }
+      };
+      const prompt = `${this.systemPrompt}\n\nDescribe the content of this image in a short, concise answer. In addition to your main answer, suggest several possible actions or responses the user could take next based on the image. Do not return a structured JSON object, just answer naturally as you would to a user. Be concise and brief.`;
+
+      const result = await this.model.generateContentStream([prompt, imagePart]);
+      let fullText = "";
+      for await (const chunk of result.stream) {
+        const chunkText = chunk.text();
+        fullText += chunkText;
+        onChunk(chunkText);
+      }
+      return { text: fullText, timestamp: Date.now() };
+    } catch (error) {
+      console.error("Error analyzing image file stream:", error);
+      throw error;
+    }
+  }
+
   public async chatWithGemini(message: string): Promise<string> {
     try {
       if (this.useOllama) {
@@ -270,6 +344,30 @@ export class LLMHelper {
     }
   }
 
+  public async chatWithGeminiStream(message: string, onChunk: (chunk: string) => void): Promise<string> {
+    try {
+      if (this.useOllama) {
+        const text = await this.callOllama(message);
+        onChunk(text);
+        return text;
+      } else if (this.model) {
+        const result = await this.model.generateContentStream(message);
+        let fullText = "";
+        for await (const chunk of result.stream) {
+          const chunkText = chunk.text();
+          fullText += chunkText;
+          onChunk(chunkText);
+        }
+        return fullText;
+      } else {
+        throw new Error("No LLM provider configured");
+      }
+    } catch (error) {
+      console.error("[LLMHelper] Error in chatWithGeminiStream:", error);
+      throw error;
+    }
+  }
+
   public async chat(message: string): Promise<string> {
     return this.chatWithGemini(message);
   }
@@ -280,11 +378,11 @@ export class LLMHelper {
 
   public async getOllamaModels(): Promise<string[]> {
     if (!this.useOllama) return [];
-    
+
     try {
       const response = await fetch(`${this.ollamaUrl}/api/tags`);
       if (!response.ok) throw new Error('Failed to fetch models');
-      
+
       const data = await response.json();
       return data.models?.map((model: any) => model.name) || [];
     } catch (error) {
@@ -298,35 +396,43 @@ export class LLMHelper {
   }
 
   public getCurrentModel(): string {
-    return this.useOllama ? this.ollamaModel : "gemini-2.0-flash";
+    return this.useOllama ? this.ollamaModel : this.geminiModelName
   }
 
   public async switchToOllama(model?: string, url?: string): Promise<void> {
     this.useOllama = true;
     if (url) this.ollamaUrl = url;
-    
+
     if (model) {
       this.ollamaModel = model;
     } else {
       // Auto-detect first available model
       await this.initializeOllamaModel();
     }
-    
+
     console.log(`[LLMHelper] Switched to Ollama: ${this.ollamaModel} at ${this.ollamaUrl}`);
   }
 
-  public async switchToGemini(apiKey?: string): Promise<void> {
+  public async switchToGemini(apiKey?: string, modelName?: string): Promise<void> {
+    if (modelName) {
+      this.geminiModelName = modelName;
+    }
     if (apiKey) {
+      this.apiKey = apiKey;
       const genAI = new GoogleGenerativeAI(apiKey);
-      this.model = genAI.getGenerativeModel({ model: "gemini-2.0-flash" });
+      this.model = genAI.getGenerativeModel({ model: this.geminiModelName });
+    } else if (this.apiKey) {
+      // Re-create model with new model name using existing API key
+      const genAI = new GoogleGenerativeAI(this.apiKey);
+      this.model = genAI.getGenerativeModel({ model: this.geminiModelName });
     }
-    
+
     if (!this.model && !apiKey) {
       throw new Error("No Gemini API key provided and no existing model instance");
     }
-    
+
     this.useOllama = false;
-    console.log("[LLMHelper] Switched to Gemini");
+    console.log(`[LLMHelper] Switched to Gemini (${this.geminiModelName})`);
   }
 
   public async testConnection(): Promise<{ success: boolean; error?: string }> {

diff --git a/electron/ProcessingHelper.ts b/electron/ProcessingHelper.ts
@@ -18,12 +18,12 @@ export class ProcessingHelper {
 
   constructor(appState: AppState) {
     this.appState = appState
-    
+
     // Check if user wants to use Ollama
     const useOllama = process.env.USE_OLLAMA === "true"
     const ollamaModel = process.env.OLLAMA_MODEL // Don't set default here, let LLMHelper auto-detect
     const ollamaUrl = process.env.OLLAMA_URL || "http://localhost:11434"
-    
+
     if (useOllama) {
       console.log("[ProcessingHelper] Initializing with Ollama")
       this.llmHelper = new LLMHelper(undefined, true, ollamaModel, ollamaUrl)
@@ -159,6 +159,10 @@ export class ProcessingHelper {
     return this.llmHelper.analyzeAudioFromBase64(data, mimeType);
   }
 
+  public async processAudioBase64Stream(data: string, mimeType: string, onChunk: (chunk: string) => void) {
+    return this.llmHelper.analyzeAudioFromBase64Stream(data, mimeType, onChunk);
+  }
+
   // Add audio file processing method
   public async processAudioFile(filePath: string) {
     return this.llmHelper.analyzeAudioFile(filePath);

diff --git a/electron/ScreenshotHelper.ts b/electron/ScreenshotHelper.ts
@@ -11,15 +11,18 @@ export class ScreenshotHelper {
   private extraScreenshotQueue: string[] = []
   private readonly MAX_SCREENSHOTS = 5
 
-  private readonly screenshotDir: string
-  private readonly extraScreenshotDir: string
+  private screenshotDir: string = ""
+  private extraScreenshotDir: string = ""
+  private dirsInitialized: boolean = false
 
   private view: "queue" | "solutions" = "queue"
 
   constructor(view: "queue" | "solutions" = "queue") {
     this.view = view
+  }
 
-    // Initialize directories
+  private ensureDirs(): void {
+    if (this.dirsInitialized) return
     this.screenshotDir = path.join(app.getPath("userData"), "screenshots")
     this.extraScreenshotDir = path.join(
       app.getPath("userData"),
@@ -33,6 +36,7 @@ export class ScreenshotHelper {
     if (!fs.existsSync(this.extraScreenshotDir)) {
       fs.mkdirSync(this.extraScreenshotDir)
     }
+    this.dirsInitialized = true
   }
 
   public getView(): "queue" | "solutions" {
@@ -78,12 +82,13 @@ export class ScreenshotHelper {
     hideMainWindow: () => void,
     showMainWindow: () => void
   ): Promise<string> {
+    this.ensureDirs()
     try {
       hideMainWindow()
-      
+
       // Add a small delay to ensure window is hidden
       await new Promise(resolve => setTimeout(resolve, 100))
-      
+
       let screenshotPath = ""
 
       if (this.view === "queue") {