Prat011 · NanditaPatil-dotcom · Nov 28, 2025 · Nov 28, 2025 · NikitaRadzkov · Dec 15, 2025
diff --git a/electron/LLMHelper.ts b/electron/LLMHelper.ts
@@ -1,11 +1,17 @@
 import { GoogleGenerativeAI, GenerativeModel } from "@google/generative-ai"
 import fs from "fs"
+import sharp from "sharp"
 
 interface OllamaResponse {
   response: string
   done: boolean
 }
 
+interface OllamaVisionResponse {
+  response: string
+  done: boolean
+}
+
 export class LLMHelper {
   private model: GenerativeModel | null = null
   private readonly systemPrompt = `You are Wingman AI, a helpful, proactive assistant for any kind of problem or situation (not just coding). For any user input, analyze the situation, provide a clear problem statement, relevant context, and suggest several possible responses or actions the user could take next. Always explain your reasoning. Present your suggestions as a list of options or next steps.`
@@ -15,12 +21,12 @@ export class LLMHelper {
 
   constructor(apiKey?: string, useOllama: boolean = false, ollamaModel?: string, ollamaUrl?: string) {
     this.useOllama = useOllama
-    
+
     if (useOllama) {
       this.ollamaUrl = ollamaUrl || "http://localhost:11434"
-      this.ollamaModel = ollamaModel || "gemma:latest" // Default fallback
+      this.ollamaModel = ollamaModel // Default fallback
       console.log(`[LLMHelper] Using Ollama with model: ${this.ollamaModel}`)
-      
+
       // Auto-detect and use first available model if specified model doesn't exist
       this.initializeOllamaModel()
     } else if (apiKey) {
@@ -50,6 +56,7 @@ export class LLMHelper {
     return text;
   }
 
+
   private async callOllama(prompt: string): Promise<string> {
     try {
       const response = await fetch(`${this.ollamaUrl}/api/generate`, {
@@ -80,6 +87,51 @@ export class LLMHelper {
     }
   }
 
+  private supportsOllamaVision(): boolean {
+    const model = this.ollamaModel.toLowerCase()
+    const visionHints = ["vision", "llava", "moondream", "pixtral", "minicpm", "gpt4o", "flux", "reka"]
+    return visionHints.some((hint) => model.includes(hint))
+  }
+
+  private async callOllamaVision(prompt: string, images: string[]): Promise<string> {
+    if (!this.supportsOllamaVision()) {
+      throw new Error(`Current Ollama model (${this.ollamaModel}) does not support image understanding. Switch to a vision-capable model such as llama3.2-vision, llava, gemma2:vision, etc.`)
+    }
+
+    try {
+      const response = await fetch(`${this.ollamaUrl}/api/generate`, {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json"
+        },
+        body: JSON.stringify({
+          model: this.ollamaModel,
+          prompt,
+          images,
+          stream: false,
+          options: {
+            temperature: 0.4,
+            top_p: 0.9
+          }
+        })
+      })
+
+      if (!response.ok) {
+        throw new Error(`Ollama vision API error: ${response.status} ${response.statusText}`)
+      }
+
+      const data: OllamaVisionResponse = await response.json()
+      if (!data?.response) {
+        throw new Error("Ollama vision API returned an empty response")
+      }
+
+      return data.response
+    } catch (error) {
+      console.error("[LLMHelper] Error calling Ollama vision endpoint:", error)
+      throw error
+    }
+  }
+
   private async checkOllamaAvailable(): Promise<boolean> {
     try {
       const response = await fetch(`${this.ollamaUrl}/api/tags`)
@@ -123,15 +175,26 @@ export class LLMHelper {
 
   public async extractProblemFromImages(imagePaths: string[]) {
     try {
-      const imageParts = await Promise.all(imagePaths.map(path => this.fileToGenerativePart(path)))
-
       const prompt = `${this.systemPrompt}\n\nYou are a wingman. Please analyze these images and extract the following information in JSON format:\n{
   "problem_statement": "A clear statement of the problem or situation depicted in the images.",
   "context": "Relevant background or context from the images.",
   "suggested_responses": ["First possible answer or action", "Second possible answer or action", "..."],
   "reasoning": "Explanation of why these suggestions are appropriate."
 }\nImportant: Return ONLY the JSON object, without any markdown formatting or code blocks.`
 
+      if (this.useOllama) {
+        const imagePayloads = await Promise.all(
+          imagePaths.map(async (imagePath) => (await fs.promises.readFile(imagePath)).toString("base64"))
+        )
+        const text = await this.callOllamaVision(prompt, imagePayloads)
+        return JSON.parse(this.cleanJsonResponse(text))
+      }
+
+      if (!this.model) {
+        throw new Error("Gemini model is not initialized. Provide a Gemini API key or enable Ollama mode.")
+      }
+
+      const imageParts = await Promise.all(imagePaths.map(path => this.fileToGenerativePart(path)))
       const result = await this.model.generateContent([prompt, ...imageParts])
       const response = await result.response
       const text = this.cleanJsonResponse(response.text())
@@ -153,13 +216,21 @@ export class LLMHelper {
   }
 }\nImportant: Return ONLY the JSON object, without any markdown formatting or code blocks.`
 
-    console.log("[LLMHelper] Calling Gemini LLM for solution...");
+    console.log("[LLMHelper] Calling LLM for solution...");
     try {
-      const result = await this.model.generateContent(prompt)
-      console.log("[LLMHelper] Gemini LLM returned result.");
-      const response = await result.response
-      const text = this.cleanJsonResponse(response.text())
-      const parsed = JSON.parse(text)
+      const text = this.useOllama
+        ? await this.callOllama(prompt)
+        : await (async () => {
+            if (!this.model) {
+              throw new Error("Gemini model is not initialized. Provide a Gemini API key or enable Ollama mode.")
+            }
+            const result = await this.model.generateContent(prompt)
+            const response = await result.response
+            return response.text()
+          })()
+
+      const cleaned = this.cleanJsonResponse(text)
+      const parsed = JSON.parse(cleaned)
       console.log("[LLMHelper] Parsed LLM response:", parsed)
       return parsed
     } catch (error) {
@@ -170,8 +241,6 @@ export class LLMHelper {
 
   public async debugSolutionWithImages(problemInfo: any, currentCode: string, debugImagePaths: string[]) {
     try {
-      const imageParts = await Promise.all(debugImagePaths.map(path => this.fileToGenerativePart(path)))
-
       const prompt = `${this.systemPrompt}\n\nYou are a wingman. Given:\n1. The original problem or situation: ${JSON.stringify(problemInfo, null, 2)}\n2. The current response or approach: ${currentCode}\n3. The debug information in the provided images\n\nPlease analyze the debug information and provide feedback in this JSON format:\n{
   "solution": {
     "code": "The code or main answer here.",
@@ -182,6 +251,21 @@ export class LLMHelper {
   }
 }\nImportant: Return ONLY the JSON object, without any markdown formatting or code blocks.`
 
+      if (this.useOllama) {
+        const imagePayloads = await Promise.all(
+          debugImagePaths.map(async (imagePath) => (await fs.promises.readFile(imagePath)).toString("base64"))
+        )
+        const text = await this.callOllamaVision(prompt, imagePayloads)
+        const parsed = JSON.parse(this.cleanJsonResponse(text))
+        console.log("[LLMHelper] Parsed debug LLM response:", parsed)
+        return parsed
+      }
+
+      if (!this.model) {
+        throw new Error("Gemini model is not initialized. Provide a Gemini API key or enable Ollama mode.")
+      }
+
+      const imageParts = await Promise.all(debugImagePaths.map(path => this.fileToGenerativePart(path)))
       const result = await this.model.generateContent([prompt, ...imageParts])
       const response = await result.response
       const text = this.cleanJsonResponse(response.text())
@@ -195,61 +279,88 @@ export class LLMHelper {
   }
 
   public async analyzeAudioFile(audioPath: string) {
+    if (this.useOllama) {
+      throw new Error("Audio analysis is currently only supported when using Gemini. Switch off USE_OLLAMA or supply a Gemini API key.")
+    }
+
     try {
-      const audioData = await fs.promises.readFile(audioPath);
+      if (!this.model) {
+        throw new Error("Gemini model is not initialized. Provide a Gemini API key or disable Ollama mode.")
+      }
+
+      const audioData = await fs.promises.readFile(audioPath)
       const audioPart = {
         inlineData: {
           data: audioData.toString("base64"),
           mimeType: "audio/mp3"
         }
-      };
-      const prompt = `${this.systemPrompt}\n\nDescribe this audio clip in a short, concise answer. In addition to your main answer, suggest several possible actions or responses the user could take next based on the audio. Do not return a structured JSON object, just answer naturally as you would to a user.`;
-      const result = await this.model.generateContent([prompt, audioPart]);
-      const response = await result.response;
-      const text = response.text();
-      return { text, timestamp: Date.now() };
+      }
+      const prompt = `${this.systemPrompt}\n\nDescribe this audio clip in a short, concise answer. In addition to your main answer, suggest several possible actions or responses the user could take next based on the audio. Do not return a structured JSON object, just answer naturally as you would to a user.`
+      const result = await this.model.generateContent([prompt, audioPart])
+      const response = await result.response
+      const text = response.text()
+      return { text, timestamp: Date.now() }
     } catch (error) {
-      console.error("Error analyzing audio file:", error);
-      throw error;
+      console.error("Error analyzing audio file:", error)
+      throw error
     }
   }
 
   public async analyzeAudioFromBase64(data: string, mimeType: string) {
+    if (this.useOllama) {
+      throw new Error("Audio analysis is currently only supported when using Gemini. Switch off USE_OLLAMA or supply a Gemini API key.")
+    }
+
     try {
+      if (!this.model) {
+        throw new Error("Gemini model is not initialized. Provide a Gemini API key or disable Ollama mode.")
+      }
+
       const audioPart = {
         inlineData: {
           data,
           mimeType
         }
-      };
-      const prompt = `${this.systemPrompt}\n\nDescribe this audio clip in a short, concise answer. In addition to your main answer, suggest several possible actions or responses the user could take next based on the audio. Do not return a structured JSON object, just answer naturally as you would to a user and be concise.`;
-      const result = await this.model.generateContent([prompt, audioPart]);
-      const response = await result.response;
-      const text = response.text();
-      return { text, timestamp: Date.now() };
+      }
+      const prompt = `${this.systemPrompt}\n\nDescribe this audio clip in a short, concise answer. In addition to your main answer, suggest several possible actions or responses the user could take next based on the audio. Do not return a structured JSON object, just answer naturally as you would to a user and be concise.`
+      const result = await this.model.generateContent([prompt, audioPart])
+      const response = await result.response
+      const text = response.text()
+      return { text, timestamp: Date.now() }
     } catch (error) {
-      console.error("Error analyzing audio from base64:", error);
-      throw error;
+      console.error("Error analyzing audio from base64:", error)
+      throw error
     }
   }
 
   public async analyzeImageFile(imagePath: string) {
     try {
-      const imageData = await fs.promises.readFile(imagePath);
+      const base64Image = (await fs.promises.readFile(imagePath)).toString("base64")
+      const prompt = `${this.systemPrompt}\n\nDescribe the content of this image in a short, concise answer. In addition to your main answer, suggest several possible actions or responses the user could take next based on the image. Do not return a structured JSON object, just answer naturally as you would to a user. Be concise and brief.`
+
+      if (this.useOllama) {
+        const text = await this.callOllamaVision(prompt, [base64Image])
+        return { text: text.trim(), timestamp: Date.now() }
+      }
+
+      if (!this.model) {
+        throw new Error("Gemini model is not initialized. Provide a Gemini API key or enable Ollama mode.")
+      }
+
       const imagePart = {
         inlineData: {
-          data: imageData.toString("base64"),
+          data: base64Image,
           mimeType: "image/png"
         }
-      };
-      const prompt = `${this.systemPrompt}\n\nDescribe the content of this image in a short, concise answer. In addition to your main answer, suggest several possible actions or responses the user could take next based on the image. Do not return a structured JSON object, just answer naturally as you would to a user. Be concise and brief.`;
-      const result = await this.model.generateContent([prompt, imagePart]);
-      const response = await result.response;
-      const text = response.text();
-      return { text, timestamp: Date.now() };
+      }
+
+      const result = await this.model.generateContent([prompt, imagePart])
+      const response = await result.response
+      const text = response.text()
+      return { text, timestamp: Date.now() }
     } catch (error) {
-      console.error("Error analyzing image file:", error);
-      throw error;
+      console.error("Error analyzing image file:", error)
+      throw error
     }
   }
 
@@ -357,4 +468,4 @@ export class LLMHelper {
       return { success: false, error: error.message };
     }
   }
-} 
+} 
diff --git a/electron/ScreenshotHelper.ts b/electron/ScreenshotHelper.ts
@@ -2,9 +2,8 @@
 
 import path from "node:path"
 import fs from "node:fs"
-import { app } from "electron"
+import { app, desktopCapturer } from "electron"
 import { v4 as uuidv4 } from "uuid"
-import screenshot from "screenshot-desktop"
 
 export class ScreenshotHelper {
   private screenshotQueue: string[] = []
@@ -28,10 +27,10 @@ export class ScreenshotHelper {
 
     // Create directories if they don't exist
     if (!fs.existsSync(this.screenshotDir)) {
-      fs.mkdirSync(this.screenshotDir)
+      fs.mkdirSync(this.screenshotDir, { recursive: true })
     }
     if (!fs.existsSync(this.extraScreenshotDir)) {
-      fs.mkdirSync(this.extraScreenshotDir)
+      fs.mkdirSync(this.extraScreenshotDir, { recursive: true })
     }
   }
 
@@ -84,11 +83,30 @@ export class ScreenshotHelper {
       // Add a small delay to ensure window is hidden
       await new Promise(resolve => setTimeout(resolve, 100))
 
+      // Use Electron's desktopCapturer to get screen sources
+      const sources = await desktopCapturer.getSources({
+        types: ['screen'],
+        thumbnailSize: { width: 1024, height: 1024 }
+      })
+
+      if (sources.length === 0) {
+        throw new Error("No screen sources available")
+      }
+
+      // Get the primary screen (first source)
+      const primaryScreen = sources[0]
+      const image = primaryScreen.thumbnail
+
+      if (image.isEmpty()) {
+        throw new Error("Failed to capture screen - image is empty")
+      }
+
       let screenshotPath = ""
+      const screenshotBuffer = image.toPNG()
 
       if (this.view === "queue") {
         screenshotPath = path.join(this.screenshotDir, `${uuidv4()}.png`)
-        await screenshot({ filename: screenshotPath })
+        await fs.promises.writeFile(screenshotPath, screenshotBuffer)
 
         this.screenshotQueue.push(screenshotPath)
         if (this.screenshotQueue.length > this.MAX_SCREENSHOTS) {
@@ -103,7 +121,7 @@ export class ScreenshotHelper {
         }
       } else {
         screenshotPath = path.join(this.extraScreenshotDir, `${uuidv4()}.png`)
-        await screenshot({ filename: screenshotPath })
+        await fs.promises.writeFile(screenshotPath, screenshotBuffer)
 
         this.extraScreenshotQueue.push(screenshotPath)
         if (this.extraScreenshotQueue.length > this.MAX_SCREENSHOTS) {

diff --git a/package.json b/package.json
@@ -155,5 +155,8 @@
     "tailwind-merge": "^2.5.4",
     "tesseract.js": "^5.0.5",
     "uuid": "^11.0.3"
-  }
+  },
+  "bin": {
+  "start-cluely": "./start.sh"
+}
 }
diff --git a/start.sh b/start.sh
@@ -0,0 +1,3 @@
+cd "/home/nandita/Documents/free-cluely"
+echo "Launching Free Cluely... hold tight!"
+npm start